(linenum→info "unix/slp.c:2238")

anthy/9100e/src-splitter/segclass.c

    1: #include <string.h>
    2: 
    3: #include <anthy/splitter.h>
    4: #include <anthy/wtype.h>
    5: #include <anthy/segclass.h>
    6: #include "wordborder.h"
    7: 
    8: static struct {
    9:   const char *name;
   10:   const char *sym;
   11: } seg_class_tab[] = {
   12:   {"文頭", "H"}, {"文末", "T"}, {"文節", "B"},
   13:   {"接続語", "C"}, {"名詞+格助詞", "Nk"}, {"名詞+終端", "Ne"},
   14:   {"動詞+付属語", "Vf"}, {"動詞+終端", "Ve"}, {"形容詞", "A"},
   15:   {"形容動詞", "AJV"},
   16:   {"連用修飾", "YM"}, {"連体修飾", "TM"},
   17:   {"名詞", "N"}, {"名詞+付属語", "Nf"}, {"名詞+連用", "Ny"},
   18:   {"動詞+連用", "Vy"},
   19:   {"動詞+連体", "Vt"},
   20:   {NULL, NULL}
   21: };
   22: 
   23: void
   24: anthy_set_seg_class(struct word_list* wl)
   25: {
   26:   int head_pos;
   27:   enum dep_class dc;
   28:   enum seg_class seg_class;
   29: 
   30:   if (!wl) return;
   31: 
   32:   head_pos = wl->head_pos;
   33:   dc = wl->part[PART_DEPWORD].dc;
   34:   seg_class = SEG_HEAD;
   35: 
   36:   if (wl->part[PART_CORE].len == 0) {
   37:     seg_class = SEG_BUNSETSU;
   38:   } else {
   39:     switch (head_pos) {
   40:     case POS_NOUN:
   41:     case POS_NUMBER:
   42:       /* BREAK THROUGH */
   43:     case POS_N2T:
   44:       if (dc == DEP_RAW) {
   45:         seg_class = SEG_MEISHI;
   46:       } else if (dc == DEP_END) {
   47:         seg_class = SEG_MEISHI_SHUTAN;
   48:       } else if (dc == DEP_RENYOU) {
   49:         seg_class = SEG_MEISHI_RENYOU;
   50:       } else if (dc == DEP_KAKUJOSHI) {
   51:         seg_class = SEG_MEISHI_KAKUJOSHI;
   52:       } else {
   53:         seg_class = SEG_MEISHI_FUZOKUGO;
   54:       }
   55:       break;
   56:     case POS_V:
   57:       if (dc == DEP_RAW) {
   58:         seg_class = SEG_BUNSETSU;
   59:       } else if (dc == DEP_END) {
   60:         seg_class = SEG_DOUSHI_SHUTAN;
   61:       } else if (dc == DEP_RENYOU) {
   62:         seg_class = SEG_DOUSHI_RENYOU;
   63:       } else if (dc == DEP_RENTAI) {
   64:         seg_class = SEG_DOUSHI_RENTAI;
   65:       } else {
   66:         seg_class = SEG_DOUSHI_FUZOKUGO;
   67:       }
   68:       break;
   69:     case POS_D2KY:
   70:       /* BREAK THROUGH */
   71:     case POS_A:
   72:       seg_class = SEG_KEIYOUSHI;
   73:       if (dc == DEP_RENYOU) {
   74:         seg_class = SEG_RENYOU_SHUSHOKU;
   75:       } else if (dc == DEP_RENTAI) {
   76:         seg_class = SEG_RENTAI_SHUSHOKU;
   77:       }
   78:       break;
   79:     case POS_AJV:
   80:       seg_class = SEG_KEIYOUDOUSHI;
   81:       if (dc == DEP_RENYOU) {
   82:         seg_class = SEG_RENYOU_SHUSHOKU;
   83:       } else if (dc == DEP_RENTAI) {
   84:         seg_class = SEG_RENTAI_SHUSHOKU;
   85:       }
   86:       break;
   87:     case POS_AV:
   88:       seg_class = SEG_RENYOU_SHUSHOKU;
   89:       break;
   90:     case POS_ME:
   91:       seg_class = SEG_RENTAI_SHUSHOKU;
   92:       break;
   93:     case POS_CONJ:
   94:       seg_class = SEG_SETSUZOKUGO;
   95:       break;
   96:     case POS_OPEN:
   97:       seg_class = SEG_BUNSETSU;
   98:       break;
   99:     case POS_CLOSE:
  100:       seg_class = SEG_BUNSETSU;
  101:       break;
  102:     default:
  103:       seg_class = SEG_MEISHI;
  104:       break;
  105:     }
  106:   }
  107:   wl->seg_class = seg_class;
  108: }
  109: 
  110: const char* anthy_seg_class_name(enum seg_class sc)
  111: {
  112:   return seg_class_tab[sc].name;
  113: }
  114: 
  115: const char* anthy_seg_class_sym(enum seg_class sc)
  116: {
  117:   return seg_class_tab[sc].sym;
  118: }
  119: 
  120: enum seg_class
  121: anthy_seg_class_by_name(const char *name)
  122: {
  123:   int i;
  124:   for (i = 0; seg_class_tab[i].name; i++) {
  125:     if (!strcmp(seg_class_tab[i].name, name)) {
  126:       return i;
  127:     }
  128:   }
  129:   return SEG_BUNSETSU;
  130: }
Syntax (Markdown)