(linenum→info "unix/slp.c:2238")

anthy/9100e/src-ordering/candhistory.c

    1: /*
    2:  * 候補の履歴を覚える
    3:  *
    4:  *
    5:  * ある読みの履歴が 候補A 候補B 候補A 候補A 候補A
    6:  * であったというような情報をもとに候補のスコアを加点する。
    7:  *
    8:  * Copyright (C) 2006-2007 TABATA Yusuke
    9:  *
   10:  */
   11: #include <stdlib.h>
   12: 
   13: #include <anthy/segment.h>
   14: #include <anthy/record.h>
   15: #include "sorter.h"
   16: 
   17: #define HISTORY_DEPTH 8
   18: #define MAX_HISTORY_ENTRY 200
   19: 
   20: /** 文節のコミットを履歴に追加する */
   21: static void
   22: learn_cand_history(struct seg_ent *seg)
   23: {
   24:   int nr, i;
   25: 
   26:   if (anthy_select_section("CAND_HISTORY", 1)) {
   27:     return ;
   28:   }
   29:   if (anthy_select_row(&seg->str, 1)) {
   30:     return ;
   31:   }
   32:   /* シフトする */
   33:   nr = anthy_get_nr_values();
   34:   nr ++;
   35:   if (nr > HISTORY_DEPTH) {
   36:     nr = HISTORY_DEPTH;
   37:   }
   38:   for (i = nr - 1; i > 0; i--) {
   39:     xstr *xs = anthy_get_nth_xstr(i - 1);
   40:     anthy_set_nth_xstr(i, xs);
   41:   }
   42:   /* 0番目に設定 */
   43:   anthy_set_nth_xstr(0, &seg->cands[seg->committed]->str);
   44:   anthy_mark_row_used();
   45: }
   46: 
   47: static void
   48: learn_suffix_history(struct seg_ent *seg)
   49: {
   50:   int i;
   51:   struct cand_ent *cand = seg->cands[seg->committed];
   52:   if (anthy_select_section("SUFFIX_HISTORY", 1)) {
   53:     return ;
   54:   }
   55:   for (i = 0; i < cand->nr_words; i++) {
   56:     struct cand_elm *elm = &cand->elm[i];
   57:     xstr xs;
   58:     if (elm->nth == -1) {
   59:       continue;
   60:     }
   61:     if (anthy_wtype_get_pos(elm->wt) != POS_SUC) {
   62:       continue;
   63:     }
   64:     if (anthy_select_row(&elm->str, 1)) {
   65:       continue;
   66:     }
   67:     if (anthy_get_nth_dic_ent_str(elm->se, &elm->str, elm->nth, &xs)) {
   68:       continue;
   69:     }
   70:     anthy_set_nth_xstr(0, &xs);
   71:     free(xs.str);
   72:   }
   73: }
   74: 
   75: /** 外から呼ばれる関数 
   76:  * 履歴に追加する */
   77: void
   78: anthy_learn_cand_history(struct segment_list *sl)
   79: {
   80:   int i, nr = 0;
   81:   for (i = 0; i < sl->nr_segments; i++) {
   82:     struct seg_ent *seg = anthy_get_nth_segment(sl, i);
   83:     xstr *xs = &seg->str;
   84:     if (seg->committed < 0) {
   85:       continue;
   86:     }
   87:     if (anthy_select_row(xs, 0)) {
   88:       if (seg->committed == 0) {
   89:         /* 候補のエントリが無くて、コミットされた候補も先頭のものであればパス */
   90:         continue;
   91:       }
   92:     }
   93:     /**/
   94:     learn_cand_history(seg);
   95:     learn_suffix_history(seg);
   96:     nr ++;
   97:   }
   98:   if (nr > 0) {
   99:     if (!anthy_select_section("CAND_HISTORY", 1)) {
  100:       anthy_truncate_section(MAX_HISTORY_ENTRY);
  101:     }
  102:     if (!anthy_select_section("SUFFIX_HISTORY", 1)) {
  103:       anthy_truncate_section(MAX_HISTORY_ENTRY);
  104:     }
  105:   }
  106: }
  107: 
  108: /* 履歴をみて候補の重みを計算する */
  109: static int
  110: get_history_weight(xstr *xs)
  111: {
  112:   int i, nr = anthy_get_nr_values();
  113:   int w = 0;
  114:   for (i = 0; i < nr; i++) {
  115:     xstr *h = anthy_get_nth_xstr(i);
  116:     if (!h) {
  117:       continue;
  118:     }
  119:     if (!anthy_xstrcmp(xs, h)) {
  120:       w++;
  121:       if (i == 0) {
  122:         /* 直前に確定されたものには高いスコア*/
  123:         w += (HISTORY_DEPTH / 2);
  124:       }
  125:     }
  126:   }
  127:   return w;
  128: }
  129: 
  130: static void
  131: reorder_by_candidate(struct seg_ent *se)
  132: {
  133:   int i, primary_score;
  134:   /**/
  135:   if (anthy_select_section("CAND_HISTORY", 1)) {
  136:     return ;
  137:   }
  138:   if (anthy_select_row(&se->str, 0)) {
  139:     return ;
  140:   }
  141:   /* 最も評価の高い候補 */
  142:   primary_score = se->cands[0]->score;
  143:   /**/
  144:   for (i = 0; i < se->nr_cands; i++) {
  145:     struct cand_ent *ce = se->cands[i];
  146:     int weight = get_history_weight(&ce->str);
  147:     ce->score += primary_score / (HISTORY_DEPTH /2) * weight;
  148:   }
  149:   anthy_mark_row_used();
  150: }
  151: 
  152: /* 接尾辞の学習を適用する */
  153: static void
  154: reorder_by_suffix(struct seg_ent *se)
  155: {
  156:   int i, j;
  157:   int delta = 0;
  158:   int top_cand = -1;
  159:   if (anthy_select_section("SUFFIX_HISTORY", 0)) {
  160:     return ;
  161:   }
  162:   /* 各候補 */
  163:   for (i = 0; i < se->nr_cands; i++) {
  164:     struct cand_ent *ce = se->cands[i];
  165:     /* 候補を構成する各単語 */
  166:     for (j = 0; j < ce->nr_words; j++) {
  167:       struct cand_elm *elm = &ce->elm[j];
  168:       xstr xs;
  169:       if (elm->nth == -1) {
  170:         continue;
  171:       }
  172:       if (anthy_wtype_get_pos(elm->wt) != POS_SUC) {
  173:         continue;
  174:       }
  175:       /* 変換元の文字列をキーに検索 */
  176:       if (anthy_select_row(&elm->str, 0)) {
  177:         continue;
  178:       }
  179:       /* 変換後の文字列を取得 */
  180:       if (anthy_get_nth_dic_ent_str(elm->se, &elm->str, elm->nth, &xs)) {
  181:         continue;
  182:       }
  183:       /* 履歴中の文字列と比較する */
  184:       if (anthy_xstrcmp(&xs, anthy_get_nth_xstr(0))) {
  185:         free(xs.str);
  186:         continue;
  187:       }
  188:       /**/
  189:       if (top_cand < 0) {
  190:         top_cand = i;
  191:       }
  192:       if (delta == 0) {
  193:         delta = (se->cands[top_cand]->score - ce->score) + 1;
  194:       }
  195:       ce->score += delta;
  196:       free(xs.str);
  197:     }
  198:   }
  199: }
  200: 
  201: /* 履歴で加点する */
  202: void
  203: anthy_reorder_candidates_by_history(struct seg_ent *se)
  204: {
  205:   reorder_by_candidate(se);
  206:   reorder_by_suffix(se);
  207: }
Syntax (Markdown)