(linenum→info "unix/slp.c:2238")

anthy/9100e/src-worddic/feature_set.c

    1: /* features
    2:  *
    3:  * 素性の番号と意味を隠蔽して管理する
    4:  *
    5:  * Copyright (C) 2006-2007 TABATA Yusuke
    6:  *
    7:  */
    8: /*
    9:   This library is free software; you can redistribute it and/or
   10:   modify it under the terms of the GNU Lesser General Public
   11:   License as published by the Free Software Foundation; either
   12:   version 2 of the License, or (at your option) any later version.
   13: 
   14:   This library is distributed in the hope that it will be useful,
   15:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   16:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   17:   Lesser General Public License for more details.
   18: 
   19:   You should have received a copy of the GNU Lesser General Public
   20:   License along with this library; if not, write to the Free Software
   21:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   22:  */
   23: #include <stdio.h>
   24: #include <stdlib.h>
   25: #include <string.h>
   26: #include <arpa/inet.h>
   27: #include <anthy/segclass.h>
   28: #include <anthy/feature_set.h>
   29: /* for MW_FEATURE* constants */
   30: #include <anthy/splitter.h>
   31: 
   32: /* 素性の番号 
   33:  *
   34:  * 0-19 クラス素性
   35:  * 30-319(30+SEG_SIZE^2) クラス遷移属性
   36:  * 540-579 その他
   37:  * 580- (1024個) 付属語の種類
   38:  */
   39: 
   40: #define CUR_CLASS_BASE 0
   41: #define DEP_TYPE_FEATURE_BASE 20
   42: #define CLASS_TRANS_BASE 30
   43: #define FEATURE_SV 542
   44: #define FEATURE_WEAK 543
   45: #define FEATURE_SUFFIX 544
   46: #define FEATURE_NUM 546
   47: #define FEATURE_CORE1 547
   48: #define FEATURE_HIGH_FREQ 548
   49: #define FEATURE_WEAK_SEQ 549
   50: #define COS_BASE 573
   51: #define DEP_FEATURE_BASE 580
   52: 
   53: void
   54: anthy_feature_list_init(struct feature_list *fl)
   55: {
   56:   fl->nr = 0;
   57:   fl->size = NR_EM_FEATURES;
   58: }
   59: 
   60: void
   61: anthy_feature_list_free(struct feature_list *fl)
   62: {
   63:   (void)fl;
   64: }
   65: 
   66: void
   67: anthy_feature_list_add(struct feature_list *fl, int f)
   68: {
   69:   if (fl->nr < NR_EM_FEATURES) {
   70:     fl->u.index[fl->nr] = f;
   71:     fl->nr++;
   72:   }
   73: }
   74: 
   75: int
   76: anthy_feature_list_nr(const struct feature_list *fl)
   77: {
   78:   return fl->nr;
   79: }
   80: 
   81: int
   82: anthy_feature_list_nth(const struct feature_list *fl, int nth)
   83: {
   84:   return fl->u.index[nth];
   85: }
   86: 
   87: static int
   88: cmp_short(const void *p1, const void *p2)
   89: {
   90:   return *((short *)p1) - *((short *)p2);
   91: }
   92: 
   93: void
   94: anthy_feature_list_sort(struct feature_list *fl)
   95: {
   96:   qsort(fl->u.index, fl->nr, sizeof(fl->u.index[0]),
   97:         cmp_short);
   98: }
   99: 
  100: 
  101: void
  102: anthy_feature_list_set_cur_class(struct feature_list *fl, int cl)
  103: {
  104:   anthy_feature_list_add(fl, CUR_CLASS_BASE + cl);
  105: }
  106: 
  107: void
  108: anthy_feature_list_set_class_trans(struct feature_list *fl, int pc, int cc)
  109: {
  110:   anthy_feature_list_add(fl, CLASS_TRANS_BASE + pc * SEG_SIZE + cc);
  111: }
  112: 
  113: void
  114: anthy_feature_list_set_dep_word(struct feature_list *fl, int h)
  115: {
  116:   anthy_feature_list_add(fl, h + DEP_FEATURE_BASE);
  117: }
  118: 
  119: void
  120: anthy_feature_list_set_dep_class(struct feature_list *fl, int c)
  121: {
  122:   anthy_feature_list_add(fl, c + DEP_TYPE_FEATURE_BASE);
  123: }
  124: 
  125: void
  126: anthy_feature_list_set_noun_cos(struct feature_list *fl, wtype_t wt)
  127: {
  128:   int c;
  129:   if (anthy_wtype_get_pos(wt) != POS_NOUN) {
  130:     return ;
  131:   }
  132:   c = anthy_wtype_get_cos(wt);
  133:   if (c == COS_SUFFIX) {
  134:     anthy_feature_list_add(fl, COS_BASE + c);
  135:   }
  136: }
  137: 
  138: void
  139: anthy_feature_list_set_mw_features(struct feature_list *fl, int mask)
  140: {
  141:   if (mask & MW_FEATURE_WEAK_CONN) {
  142:     anthy_feature_list_add(fl, FEATURE_WEAK);
  143:   }
  144:   if (mask & MW_FEATURE_SUFFIX) {
  145:     anthy_feature_list_add(fl, FEATURE_SUFFIX);
  146:   }
  147:   if (mask & MW_FEATURE_SV) {
  148:     anthy_feature_list_add(fl, FEATURE_SV);
  149:   }
  150:   if (mask & MW_FEATURE_NUM) {
  151:     anthy_feature_list_add(fl, FEATURE_NUM);
  152:   }
  153:   if (mask & MW_FEATURE_CORE1) {
  154:     anthy_feature_list_add(fl, FEATURE_CORE1);
  155:   }
  156:   if (mask & MW_FEATURE_HIGH_FREQ) {
  157:     anthy_feature_list_add(fl, FEATURE_HIGH_FREQ);
  158:   }
  159:   if (mask & MW_FEATURE_WEAK_SEQ) {
  160:     anthy_feature_list_add(fl, FEATURE_WEAK_SEQ);
  161:   }
  162: }
  163: 
  164: void
  165: anthy_feature_list_print(struct feature_list *fl)
  166: {
  167:   int i;
  168:   printf("features=");
  169:   for (i = 0; i < fl->nr; i++) {
  170:     if (i) {
  171:       printf(",");
  172:     }
  173:     printf("%d", fl->u.index[i]);
  174:   }
  175:   printf("\n");
  176: }
  177: 
  178: static int
  179: compare_line(const void *kp, const void *cp)
  180: {
  181:   const int *f = kp;
  182:   const struct feature_freq *c = cp;
  183:   int i;
  184:   for (i = 0; i < NR_EM_FEATURES; i++) {
  185:     if (f[i] != (int)ntohl(c->f[i])) {
  186:       return f[i] - ntohl(c->f[i]);
  187:     }
  188:   }
  189:   return 0;
  190: }
  191: 
  192: struct feature_freq *
  193: anthy_find_array_freq(const void *image, int *f, int nr,
  194:                       struct feature_freq *arg)
  195: {
  196:   struct feature_freq *res;
  197:   int nr_lines, i;
  198:   const int *array = (int *)image;
  199:   int n[NR_EM_FEATURES];
  200:   if (!image) {
  201:     return NULL;
  202:   }
  203:   /* コピーする */
  204:   for (i = 0; i < NR_EM_FEATURES; i++) {
  205:     if (i < nr) {
  206:       n[i] = f[i];
  207:     } else {
  208:       n[i] = 0;
  209:     }
  210:   }
  211:   /**/
  212:   nr_lines = ntohl(array[1]);
  213:   res = bsearch(n, &array[16], nr_lines,
  214:                 sizeof(struct feature_freq),
  215:                 compare_line);
  216:   if (!res) {
  217:     return NULL;
  218:   }
  219:   for (i = 0; i < NR_EM_FEATURES + 2; i++) {
  220:     arg->f[i] = ntohl(res->f[i]);
  221:   }
  222:   return arg;
  223: }
  224: 
  225: struct feature_freq *
  226: anthy_find_feature_freq(const void *image,
  227:                         const struct feature_list *fl,
  228:                         struct feature_freq *arg)
  229: {
  230:   int i, nr;
  231:   int f[NR_EM_FEATURES + 2];
  232: 
  233:   /* 配列にコピーする */
  234:   nr = anthy_feature_list_nr(fl);
  235:   for (i = 0; i < NR_EM_FEATURES + 2; i++) {
  236:     if (i < nr) {
  237:       f[i] = anthy_feature_list_nth(fl, i);
  238:     } else {
  239:       f[i] = 0;
  240:     }
  241:   }
  242:   return anthy_find_array_freq(image, f, NR_EM_FEATURES, arg);
  243: }
  244: 
  245: void
  246: anthy_init_features(void)
  247: {
  248: }
Syntax (Markdown)