(linenum→info "unix/slp.c:2238")

anthy/9100e/src-main/context.c

    1: /*
    2:  * 変換や文節の伸縮などの操作が進行中の文字列や候補などを
    3:  * まとめて変換コンテキストと呼ぶ。
    4:  * Anthyのコンテキストに対する操作は全てここから呼ばれる。
    5:  * 各操作に対して変換パイプラインの必要なモジュールを順に呼びだす。
    6:  *
    7:  * personalityの管理もする。
    8:  *
    9:  * Funded by IPA未踏ソフトウェア創造事業 2001 10/29
   10:  * Copyright (C) 2000-2007 TABATA Yusuke
   11:  *
   12:  * $Id: context.c,v 1.26 2002/11/17 14:45:47 yusuke Exp $
   13:  */
   14: /*
   15:   This library is free software; you can redistribute it and/or
   16:   modify it under the terms of the GNU Lesser General Public
   17:   License as published by the Free Software Foundation; either
   18:   version 2 of the License, or (at your option) any later version.
   19: 
   20:   This library is distributed in the hope that it will be useful,
   21:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   22:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   23:   Lesser General Public License for more details.
   24: 
   25:   You should have received a copy of the GNU Lesser General Public
   26:   License along with this library; if not, write to the Free Software
   27:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   28:  */
   29: #include <sys/types.h>
   30: #include <sys/stat.h>
   31: #include <unistd.h>
   32: 
   33: #include <stdlib.h>
   34: #include <stdio.h>
   35: #include <string.h>
   36: 
   37: #include <anthy/anthy.h>
   38: #include <anthy/alloc.h>
   39: #include <anthy/record.h>
   40: #include <anthy/ordering.h>
   41: #include <anthy/splitter.h>
   42: #include <anthy/xstr.h>
   43: #include "main.h"
   44: 
   45: /**/
   46: static allocator context_ator;
   47: 
   48: /** 現在のpersonality 
   49:  * 未設定時: null
   50:  * 未設定のまま変換を開始した場合: "default"
   51:  * anonymousの場合: ""
   52:  */
   53: static char *current_personality;
   54: 
   55: /**/
   56: #define HISTORY_FILE_LIMIT 100000
   57: 
   58: static void
   59: context_dtor(void *p)
   60: {
   61:   anthy_do_reset_context((struct anthy_context *)p);
   62: }
   63: 
   64: /** 現在のpersonalityを返す */
   65: static char *
   66: get_personality(void)
   67: {
   68:   if (!current_personality) {
   69:     current_personality = strdup("default");
   70:     anthy_dic_set_personality(current_personality);
   71:   }
   72:   return current_personality;
   73: }
   74: 
   75: static void
   76: release_segment(struct seg_ent *s)
   77: {
   78:   if (s->cands) {
   79:     int i;
   80:     for (i = 0; i < s->nr_cands; i++) {
   81:       anthy_release_cand_ent(s->cands[i]);
   82:     }
   83:     free (s->cands);
   84:   }
   85:   if (s->mw_array) {
   86:     free(s->mw_array);
   87:   }
   88:   free(s);
   89:   
   90: }
   91: 
   92: /** 文節リストの最後の要素を削除する */
   93: static void
   94: pop_back_seg_ent(struct anthy_context *c)
   95: {
   96:   struct seg_ent *s;
   97:   s = c->seg_list.list_head.prev;
   98:   if (s == &c->seg_list.list_head) {
   99:     return ;
  100:   }
  101:   s->prev->next = s->next;
  102:   s->next->prev = s->prev;
  103:   release_segment(s);
  104:   c->seg_list.nr_segments --;
  105: }
  106: 
  107: 
  108: /** n番目の文節の文字のindexを求める */
  109: static int
  110: get_nth_segment_index(struct anthy_context *c, int n)
  111: {
  112:   int i,s;
  113:   for (i = 0, s = 0; i < c->str.len; i++) {
  114:     if (c->split_info.ce[i].seg_border) {
  115:       if (s == n) {
  116:         return i;
  117:       }
  118:       s++;
  119:     }
  120:   }
  121:   return -1;
  122: }
  123: 
  124: /** n番目の文節の長さを求める.
  125:  * segment_listが構成されていなくても計算できるようにする.
  126:  */
  127: static int
  128: get_nth_segment_len(struct anthy_context *c, int sindex)
  129: {
  130:   int a,i,l;
  131:   a = get_nth_segment_index(c, sindex);
  132:   if ( a == -1){
  133:     return -1;
  134:   }
  135:   l = 1;
  136:   for (i = a+1; !c->split_info.ce[i].seg_border; i++) {
  137:     l++;
  138:   }
  139:   return l;
  140: }
  141: 
  142: /** metawordの配列を作る */
  143: static void
  144: make_metaword_array(struct anthy_context *ac,
  145:                     struct seg_ent *se)
  146: {
  147:   int i;
  148:   se->mw_array = NULL;
  149:   for (i = se->len; i > 0; i--) {
  150:     int j;
  151:     /* 最後に濁点とかがついてたら直前の文字ごと落す */
  152:     if (i < se->len &&
  153:         anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
  154:       /* FIXME 濁点とかがありえない並びをしてたら */
  155:       i--;
  156:       continue ;
  157:     }
  158: 
  159:     se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i);
  160:     if (!se->nr_metaword) {
  161:       continue ;
  162:     }
  163:     /* metawordを配列に取り込む */
  164:     se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword);
  165:     for (j = 0; j < se->nr_metaword; j++) {
  166:       se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j);
  167:     }
  168:     return;
  169:   }
  170: }
  171: 
  172: static struct seg_ent*
  173: create_segment(struct anthy_context *ac, int from, int len,
  174:                struct meta_word* best_mw)
  175: {
  176:   struct seg_ent* s;
  177:   s = (struct seg_ent *)malloc(sizeof(struct seg_ent));
  178:   s->str.str = &ac->str.str[from];
  179:   s->str.len = len;
  180:   s->from = from;
  181:   s->len = s->str.len;
  182:   s->nr_cands = 0;
  183:   s->cands = NULL;
  184:   s->best_seg_class = ac->split_info.ce[from].best_seg_class;
  185:   s->best_mw = best_mw;
  186:   make_metaword_array(ac, s);
  187:   return s;
  188: }
  189: 
  190: /** 変換コンテキストに文節を追加する */
  191: static void
  192: push_back_segment(struct anthy_context *ac, struct seg_ent *se)
  193: {
  194:   se->next = &ac->seg_list.list_head;
  195:   se->prev = ac->seg_list.list_head.prev;
  196:   ac->seg_list.list_head.prev->next = se;
  197:   ac->seg_list.list_head.prev = se;
  198:   ac->seg_list.nr_segments ++;
  199:   se->committed = -1;
  200: }
  201: 
  202: /** splitterによって配列中に付けられた文節境界のマークから、
  203:  * 文節のリストを構成する
  204:  */
  205: static void
  206: create_segment_list(struct anthy_context *ac, int from, int to)
  207: {
  208:   int i, n;
  209:   struct seg_ent *s;
  210:   /* from の所までにいくつの文節があるか調べる */
  211:   i = 0; n = 0;
  212:   while (i < from) {
  213:     i += get_nth_segment_len(ac, n);
  214:     n++;
  215:   };
  216:   /**/
  217:   for (i = from; i < to; i++) {
  218:     if (ac->split_info.ce[i].seg_border) {
  219:       int len = get_nth_segment_len(ac, n);
  220:       s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw);
  221: 
  222:       push_back_segment(ac, s);
  223:       n++;
  224:     }
  225:   }
  226: }
  227: 
  228: /** コンテキストを作る */
  229: struct anthy_context *
  230: anthy_do_create_context(int encoding)
  231: {
  232:   struct anthy_context *ac;
  233:   char *p = get_personality();
  234: 
  235:   if (!p) {
  236:     return NULL;
  237:   }
  238: 
  239:   ac = (struct anthy_context *)anthy_smalloc(context_ator);
  240:   ac->str.str = NULL;
  241:   ac->str.len = 0;
  242:   ac->seg_list.nr_segments = 0;
  243:   ac->seg_list.list_head.prev = &ac->seg_list.list_head;
  244:   ac->seg_list.list_head.next = &ac->seg_list.list_head;
  245:   ac->split_info.word_split_info = NULL;
  246:   ac->split_info.ce = NULL;
  247:   ac->ordering_info.oc = NULL;
  248:   ac->dic_session = NULL;
  249:   ac->prediction.str.str = NULL;
  250:   ac->prediction.str.len = 0;
  251:   ac->prediction.nr_prediction = 0;
  252:   ac->prediction.predictions = NULL;
  253:   ac->encoding = encoding;
  254:   ac->reconversion_mode = ANTHY_RECONVERT_AUTO;
  255: 
  256:   return ac;
  257: }
  258: 
  259: /** コンテキストのアロケータを作る */
  260: void
  261: anthy_init_contexts(void)
  262: {
  263:   context_ator = anthy_create_allocator(sizeof(struct anthy_context),
  264:                                         context_dtor);
  265: }
Permalink to this note knok: anthy/9100e/src-main/context.c:260-265 on Thu Feb 28 17:58:16 +0900 2008

コンテキストの初期化?

266: 267: void 268: anthy_quit_contexts(void) 269: { 270: anthy_free_allocator(context_ator); 271: } 272: 273: static void 274: release_prediction(struct prediction_cache *pc) 275: { 276: int i; 277: if (pc->str.str) { 278: free(pc->str.str); 279: pc->str.str = NULL; 280: } 281: if (pc->predictions) { 282: for (i = 0; i < pc->nr_prediction; ++i) { 283: anthy_free_xstr(pc->predictions[i].src_str); 284: anthy_free_xstr(pc->predictions[i].str); 285: } 286: free(pc->predictions); 287: pc->predictions = NULL; 288: } 289: } 290: 291: void 292: anthy_release_segment_list(struct anthy_context *ac) 293: { 294: int i, sc; 295: sc = ac->seg_list.nr_segments; 296: for (i = 0; i < sc; i++) { 297: pop_back_seg_ent(ac); 298: } 299: ac->seg_list.nr_segments = 0; 300: } 301: 302: /* resetではcontextのために確保されたリソースを全て解放する */ 303: void 304: anthy_do_reset_context(struct anthy_context *ac) 305: { 306: /* まず辞書セッションを解放 */ 307: if (ac->dic_session) { 308: anthy_dic_release_session(ac->dic_session); 309: ac->dic_session = NULL; 310: } 311: if (!ac->str.str) { 312: /* 文字列が設定されていなければ解放すべき物はもう無い */ 313: return ; 314: } 315: free(ac->str.str); 316: ac->str.str = NULL; 317: anthy_release_split_context(&ac->split_info); 318: anthy_release_segment_list(ac); 319: 320: /* 予測された文字列の解放 */ 321: release_prediction(&ac->prediction); 322: } 323: 324: void 325: anthy_do_release_context(struct anthy_context *ac) 326: { 327: anthy_sfree(context_ator, ac); 328: } 329: 330: static void 331: make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse) 332: { 333: int i; 334: int len = ac->str.len; 335: 336: /* 文節の境界を設定 */ 337: /* from と from2の間に境界を作ることを禁止する */ 338: anthy_mark_border(&ac->split_info, from, from2, len); 339: create_segment_list(ac, from, len); 340: anthy_sort_metaword(&ac->seg_list); 341: 342: /* 候補を列挙 */ 343: for (i = 0; i < ac->seg_list.nr_segments; i++) { 344: anthy_do_make_candidates(&ac->split_info, 345: anthy_get_nth_segment(&ac->seg_list, i), 346: is_reverse); 347: } 348: /* 候補をソート */ 349: anthy_sort_candidate(&ac->seg_list, 0); 350: } 351: 352: int 353: anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse) 354: { 355: int i; 356: 357: /* 文字列をコピー(一文字分余計にして0をセット) */ 358: ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1)); 359: anthy_xstrcpy(&ac->str, s); 360: ac->str.str[s->len] = 0; 361: 362: /* splitterの初期化*/ 363: anthy_init_split_context(&ac->str, &ac->split_info, is_reverse); 364: 365: /* 解の候補を作成 */ 366: make_candidates(ac, 0, 0, is_reverse); 367: 368: /* 最初に設定した文節境界を覚えておく */ 369: for (i = 0; i < ac->seg_list.nr_segments; i++) { 370: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 371: ac->split_info.ce[s->from].initial_seg_len = s->len; 372: } 373: 374: return 0; 375: } 376: 377: void 378: anthy_do_resize_segment(struct anthy_context *ac, 379: int nth, int resize) 380: { 381: int i; 382: int index, len, sc; 383: 384: /* resizeが可能か検査する */ 385: if (nth >= ac->seg_list.nr_segments) { 386: return ; 387: } 388: index = get_nth_segment_index(ac, nth); 389: len = get_nth_segment_len(ac, nth); 390: if (index + len + resize > ac->str.len) { 391: return ; 392: } 393: if (len + resize < 1) { 394: return ; 395: } 396: 397: /* nth以降のseg_entを解放する */ 398: sc = ac->seg_list.nr_segments; 399: for (i = nth; i < sc; i++) { 400: pop_back_seg_ent(ac); 401: } 402: 403: /* resizeしたseg_borderをマークする */ 404: /* 現在のマークを消して新しいマークをつける */ 405: ac->split_info.ce[index+len].seg_border = 0; 406: ac->split_info.ce[ac->str.len].seg_border = 1; 407: for (i = index+len+resize+1; i < ac->str.len; i++) { 408: ac->split_info.ce[i].seg_border = 0; 409: } 410: ac->split_info.ce[index+len+resize].seg_border = 1; 411: for (i = index; i < ac->str.len; i++) { 412: ac->split_info.ce[i].best_mw = NULL; 413: } 414: 415: /* 解の候補を作成 */ 416: make_candidates(ac, index, index+len+resize, 0); 417: } 418: 419: /* 420: * n番めの文節を取得する、無い場合にはNULLを返す 421: */ 422: struct seg_ent * 423: anthy_get_nth_segment(struct segment_list *sl, int n) 424: { 425: int i; 426: struct seg_ent *se; 427: if (n >= sl->nr_segments || 428: n < 0) { 429: return NULL; 430: } 431: for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next); 432: return se; 433: } 434: 435: int 436: anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs) 437: { 438: struct prediction_cache* prediction = &ac->prediction; 439: int nr_prediction; 440: 441: /* まず辞書セッションを解放 */ 442: if (ac->dic_session) { 443: anthy_dic_release_session(ac->dic_session); 444: ac->dic_session = NULL; 445: } 446: /* 予測された文字列の解放 */ 447: release_prediction(&ac->prediction); 448: 449: /* 辞書セッションの開始 */ 450: if (!ac->dic_session) { 451: ac->dic_session = anthy_dic_create_session(); 452: if (!ac->dic_session) { 453: return -1; 454: } 455: } 456: 457: prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1)); 458: anthy_xstrcpy(&prediction->str, xs); 459: prediction->str.str[xs->len]=0; 460: 461: nr_prediction = anthy_traverse_record_for_prediction(xs, NULL); 462: prediction->nr_prediction = nr_prediction; 463: 464: if (nr_prediction) { 465: prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) * 466: nr_prediction); 467: anthy_traverse_record_for_prediction(xs, prediction->predictions); 468: } 469: return 0; 470: } 471: 472: static const char * 473: get_change_state(struct anthy_context *ac) 474: { 475: int resize = 0, cand_change = 0; 476: int i; 477: for (i = 0; i < ac->seg_list.nr_segments; i++) { 478: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 479: if (ac->split_info.ce[s->from].initial_seg_len != s->len) { 480: resize = 1; 481: } 482: if (s->committed > 0) { 483: cand_change = 1; 484: } 485: } 486: /**/ 487: if (resize && cand_change) { 488: return "SC"; 489: } 490: if (resize) { 491: return "S"; 492: } 493: if (cand_change) { 494: return "C"; 495: } 496: return "-"; 497: } 498: 499: static void 500: write_history(FILE *fp, struct anthy_context *ac) 501: { 502: int i; 503: /* 読み */ 504: fprintf(fp, "|"); 505: for (i = 0; i < ac->seg_list.nr_segments; i++) { 506: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 507: char *c = anthy_xstr_to_cstr(&s->str, ANTHY_EUC_JP_ENCODING); 508: fprintf(fp, "%s|", c); 509: free(c); 510: } 511: fprintf(fp, " |"); 512: /* 結果 */ 513: for (i = 0; i < ac->seg_list.nr_segments; i++) { 514: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 515: char *c; 516: /**/ 517: if (s->committed < 0) { 518: fprintf(fp, "?|"); 519: continue ; 520: } 521: c = anthy_xstr_to_cstr(&s->cands[s->committed]->str, 522: ANTHY_EUC_JP_ENCODING); 523: fprintf(fp, "%s|", c); 524: free(c); 525: } 526: } 527: 528: void 529: anthy_save_history(const char *fn, struct anthy_context *ac) 530: { 531: FILE *fp; 532: struct stat st; 533: if (!fn) { 534: return ; 535: } 536: fp = fopen(fn, "a"); 537: if (!fp) { 538: return ; 539: } 540: if (stat(fn, &st) || 541: st.st_size > HISTORY_FILE_LIMIT) { 542: fclose(fp); 543: return ; 544: } 545: /**/ 546: fprintf(fp, "anthy-%s ", anthy_get_version_string()); 547: fprintf(fp, "%s ", get_change_state(ac)); 548: write_history(fp, ac); 549: fprintf(fp, "\n"); 550: fclose(fp); 551: /**/ 552: chmod(fn, S_IREAD | S_IWRITE); 553: } 554: 555: /** 候補を表示する */ 556: void 557: anthy_print_candidate(struct cand_ent *ce) 558: { 559: int mod = (ce->score % 1000); 560: int seg_score = 0; 561: 562: if (ce->mw) { 56