
1: /* 2: * 変換や文節の伸縮などの操作が進行中の文字列や候補などを 3: * まとめて変換コンテキストと呼ぶ。 4: * Anthyのコンテキストに対する操作は全てここから呼ばれる。 5: * 各操作に対して変換パイプラインの必要なモジュールを順に呼びだす。 6: * 7: * personalityの管理もする。 8: * 9: * Funded by IPA未踏ソフトウェア創造事業 2001 10/29 10: * Copyright (C) 2000-2007 TABATA Yusuke 11: * 12: * $Id: context.c,v 1.26 2002/11/17 14:45:47 yusuke Exp $ 13: */ 14: /* 15: This library is free software; you can redistribute it and/or 16: modify it under the terms of the GNU Lesser General Public 17: License as published by the Free Software Foundation; either 18: version 2 of the License, or (at your option) any later version. 19: 20: This library is distributed in the hope that it will be useful, 21: but WITHOUT ANY WARRANTY; without even the implied warranty of 22: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23: Lesser General Public License for more details. 24: 25: You should have received a copy of the GNU Lesser General Public 26: License along with this library; if not, write to the Free Software 27: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 28: */ 29: #include <sys/types.h> 30: #include <sys/stat.h> 31: #include <unistd.h> 32: 33: #include <stdlib.h> 34: #include <stdio.h> 35: #include <string.h> 36: 37: #include <anthy/anthy.h> 38: #include <anthy/alloc.h> 39: #include <anthy/record.h> 40: #include <anthy/ordering.h> 41: #include <anthy/splitter.h> 42: #include <anthy/xstr.h> 43: #include "main.h" 44: 45: /**/ 46: static allocator context_ator; 47: 48: /** 現在のpersonality 49: * 未設定時: null 50: * 未設定のまま変換を開始した場合: "default" 51: * anonymousの場合: "" 52: */ 53: static char *current_personality; 54: 55: /**/ 56: #define HISTORY_FILE_LIMIT 100000 57: 58: static void 59: context_dtor(void *p) 60: { 61: anthy_do_reset_context((struct anthy_context *)p); 62: } 63: 64: /** 現在のpersonalityを返す */ 65: static char * 66: get_personality(void) 67: { 68: if (!current_personality) { 69: current_personality = strdup("default"); 70: anthy_dic_set_personality(current_personality); 71: } 72: return current_personality; 73: } 74: 75: static void 76: release_segment(struct seg_ent *s) 77: { 78: if (s->cands) { 79: int i; 80: for (i = 0; i < s->nr_cands; i++) { 81: anthy_release_cand_ent(s->cands[i]); 82: } 83: free (s->cands); 84: } 85: if (s->mw_array) { 86: free(s->mw_array); 87: } 88: free(s); 89: 90: } 91: 92: /** 文節リストの最後の要素を削除する */ 93: static void 94: pop_back_seg_ent(struct anthy_context *c) 95: { 96: struct seg_ent *s; 97: s = c->seg_list.list_head.prev; 98: if (s == &c->seg_list.list_head) { 99: return ; 100: } 101: s->prev->next = s->next; 102: s->next->prev = s->prev; 103: release_segment(s); 104: c->seg_list.nr_segments --; 105: } 106: 107: 108: /** n番目の文節の文字のindexを求める */ 109: static int 110: get_nth_segment_index(struct anthy_context *c, int n) 111: { 112: int i,s; 113: for (i = 0, s = 0; i < c->str.len; i++) { 114: if (c->split_info.ce[i].seg_border) { 115: if (s == n) { 116: return i; 117: } 118: s++; 119: } 120: } 121: return -1; 122: } 123: 124: /** n番目の文節の長さを求める. 125: * segment_listが構成されていなくても計算できるようにする. 126: */ 127: static int 128: get_nth_segment_len(struct anthy_context *c, int sindex) 129: { 130: int a,i,l; 131: a = get_nth_segment_index(c, sindex); 132: if ( a == -1){ 133: return -1; 134: } 135: l = 1; 136: for (i = a+1; !c->split_info.ce[i].seg_border; i++) { 137: l++; 138: } 139: return l; 140: } 141: 142: /** metawordの配列を作る */ 143: static void 144: make_metaword_array(struct anthy_context *ac, 145: struct seg_ent *se) 146: { 147: int i; 148: se->mw_array = NULL; 149: for (i = se->len; i > 0; i--) { 150: int j; 151: /* 最後に濁点とかがついてたら直前の文字ごと落す */ 152: if (i < se->len && 153: anthy_get_xchar_type(se->str.str[i]) & XCT_PART) { 154: /* FIXME 濁点とかがありえない並びをしてたら */ 155: i--; 156: continue ; 157: } 158: 159: se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i); 160: if (!se->nr_metaword) { 161: continue ; 162: } 163: /* metawordを配列に取り込む */ 164: se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword); 165: for (j = 0; j < se->nr_metaword; j++) { 166: se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j); 167: } 168: return; 169: } 170: } 171: 172: static struct seg_ent* 173: create_segment(struct anthy_context *ac, int from, int len, 174: struct meta_word* best_mw) 175: { 176: struct seg_ent* s; 177: s = (struct seg_ent *)malloc(sizeof(struct seg_ent)); 178: s->str.str = &ac->str.str[from]; 179: s->str.len = len; 180: s->from = from; 181: s->len = s->str.len; 182: s->nr_cands = 0; 183: s->cands = NULL; 184: s->best_seg_class = ac->split_info.ce[from].best_seg_class; 185: s->best_mw = best_mw; 186: make_metaword_array(ac, s); 187: return s; 188: } 189: 190: /** 変換コンテキストに文節を追加する */ 191: static void 192: push_back_segment(struct anthy_context *ac, struct seg_ent *se) 193: { 194: se->next = &ac->seg_list.list_head; 195: se->prev = ac->seg_list.list_head.prev; 196: ac->seg_list.list_head.prev->next = se; 197: ac->seg_list.list_head.prev = se; 198: ac->seg_list.nr_segments ++; 199: se->committed = -1; 200: } 201: 202: /** splitterによって配列中に付けられた文節境界のマークから、 203: * 文節のリストを構成する 204: */ 205: static void 206: create_segment_list(struct anthy_context *ac, int from, int to) 207: { 208: int i, n; 209: struct seg_ent *s; 210: /* from の所までにいくつの文節があるか調べる */ 211: i = 0; n = 0; 212: while (i < from) { 213: i += get_nth_segment_len(ac, n); 214: n++; 215: }; 216: /**/ 217: for (i = from; i < to; i++) { 218: if (ac->split_info.ce[i].seg_border) { 219: int len = get_nth_segment_len(ac, n); 220: s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw); 221: 222: push_back_segment(ac, s); 223: n++; 224: } 225: } 226: } 227: 228: /** コンテキストを作る */ 229: struct anthy_context * 230: anthy_do_create_context(int encoding) 231: { 232: struct anthy_context *ac; 233: char *p = get_personality(); 234: 235: if (!p) { 236: return NULL; 237: } 238: 239: ac = (struct anthy_context *)anthy_smalloc(context_ator); 240: ac->str.str = NULL; 241: ac->str.len = 0; 242: ac->seg_list.nr_segments = 0; 243: ac->seg_list.list_head.prev = &ac->seg_list.list_head; 244: ac->seg_list.list_head.next = &ac->seg_list.list_head; 245: ac->split_info.word_split_info = NULL; 246: ac->split_info.ce = NULL; 247: ac->ordering_info.oc = NULL; 248: ac->dic_session = NULL; 249: ac->prediction.str.str = NULL; 250: ac->prediction.str.len = 0; 251: ac->prediction.nr_prediction = 0; 252: ac->prediction.predictions = NULL; 253: ac->encoding = encoding; 254: ac->reconversion_mode = ANTHY_RECONVERT_AUTO; 255: 256: return ac; 257: } 258: 259: /** コンテキストのアロケータを作る */ 260: void 261: anthy_init_contexts(void) 262: { 263: context_ator = anthy_create_allocator(sizeof(struct anthy_context), 264: context_dtor); 265: }knok: anthy/9100e/src-main/context.c:260-265 on Thu Feb 28 17:58:16 +0900 2008266: 267: void 268: anthy_quit_contexts(void) 269: { 270: anthy_free_allocator(context_ator); 271: } 272: 273: static void 274: release_prediction(struct prediction_cache *pc) 275: { 276: int i; 277: if (pc->str.str) { 278: free(pc->str.str); 279: pc->str.str = NULL; 280: } 281: if (pc->predictions) { 282: for (i = 0; i < pc->nr_prediction; ++i) { 283: anthy_free_xstr(pc->predictions[i].src_str); 284: anthy_free_xstr(pc->predictions[i].str); 285: } 286: free(pc->predictions); 287: pc->predictions = NULL; 288: } 289: } 290: 291: void 292: anthy_release_segment_list(struct anthy_context *ac) 293: { 294: int i, sc; 295: sc = ac->seg_list.nr_segments; 296: for (i = 0; i < sc; i++) { 297: pop_back_seg_ent(ac); 298: } 299: ac->seg_list.nr_segments = 0; 300: } 301: 302: /* resetではcontextのために確保されたリソースを全て解放する */ 303: void 304: anthy_do_reset_context(struct anthy_context *ac) 305: { 306: /* まず辞書セッションを解放 */ 307: if (ac->dic_session) { 308: anthy_dic_release_session(ac->dic_session); 309: ac->dic_session = NULL; 310: } 311: if (!ac->str.str) { 312: /* 文字列が設定されていなければ解放すべき物はもう無い */ 313: return ; 314: } 315: free(ac->str.str); 316: ac->str.str = NULL; 317: anthy_release_split_context(&ac->split_info); 318: anthy_release_segment_list(ac); 319: 320: /* 予測された文字列の解放 */ 321: release_prediction(&ac->prediction); 322: } 323: 324: void 325: anthy_do_release_context(struct anthy_context *ac) 326: { 327: anthy_sfree(context_ator, ac); 328: } 329: 330: static void 331: make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse) 332: { 333: int i; 334: int len = ac->str.len; 335: 336: /* 文節の境界を設定 */ 337: /* from と from2の間に境界を作ることを禁止する */ 338: anthy_mark_border(&ac->split_info, from, from2, len); 339: create_segment_list(ac, from, len); 340: anthy_sort_metaword(&ac->seg_list); 341: 342: /* 候補を列挙 */ 343: for (i = 0; i < ac->seg_list.nr_segments; i++) { 344: anthy_do_make_candidates(&ac->split_info, 345: anthy_get_nth_segment(&ac->seg_list, i), 346: is_reverse); 347: } 348: /* 候補をソート */ 349: anthy_sort_candidate(&ac->seg_list, 0); 350: } 351: 352: int 353: anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse) 354: { 355: int i; 356: 357: /* 文字列をコピー(一文字分余計にして0をセット) */ 358: ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1)); 359: anthy_xstrcpy(&ac->str, s); 360: ac->str.str[s->len] = 0; 361: 362: /* splitterの初期化*/ 363: anthy_init_split_context(&ac->str, &ac->split_info, is_reverse); 364: 365: /* 解の候補を作成 */ 366: make_candidates(ac, 0, 0, is_reverse); 367: 368: /* 最初に設定した文節境界を覚えておく */ 369: for (i = 0; i < ac->seg_list.nr_segments; i++) { 370: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 371: ac->split_info.ce[s->from].initial_seg_len = s->len; 372: } 373: 374: return 0; 375: } 376: 377: void 378: anthy_do_resize_segment(struct anthy_context *ac, 379: int nth, int resize) 380: { 381: int i; 382: int index, len, sc; 383: 384: /* resizeが可能か検査する */ 385: if (nth >= ac->seg_list.nr_segments) { 386: return ; 387: } 388: index = get_nth_segment_index(ac, nth); 389: len = get_nth_segment_len(ac, nth); 390: if (index + len + resize > ac->str.len) { 391: return ; 392: } 393: if (len + resize < 1) { 394: return ; 395: } 396: 397: /* nth以降のseg_entを解放する */ 398: sc = ac->seg_list.nr_segments; 399: for (i = nth; i < sc; i++) { 400: pop_back_seg_ent(ac); 401: } 402: 403: /* resizeしたseg_borderをマークする */ 404: /* 現在のマークを消して新しいマークをつける */ 405: ac->split_info.ce[index+len].seg_border = 0; 406: ac->split_info.ce[ac->str.len].seg_border = 1; 407: for (i = index+len+resize+1; i < ac->str.len; i++) { 408: ac->split_info.ce[i].seg_border = 0; 409: } 410: ac->split_info.ce[index+len+resize].seg_border = 1; 411: for (i = index; i < ac->str.len; i++) { 412: ac->split_info.ce[i].best_mw = NULL; 413: } 414: 415: /* 解の候補を作成 */ 416: make_candidates(ac, index, index+len+resize, 0); 417: } 418: 419: /* 420: * n番めの文節を取得する、無い場合にはNULLを返す 421: */ 422: struct seg_ent * 423: anthy_get_nth_segment(struct segment_list *sl, int n) 424: { 425: int i; 426: struct seg_ent *se; 427: if (n >= sl->nr_segments || 428: n < 0) { 429: return NULL; 430: } 431: for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next); 432: return se; 433: } 434: 435: int 436: anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs) 437: { 438: struct prediction_cache* prediction = &ac->prediction; 439: int nr_prediction; 440: 441: /* まず辞書セッションを解放 */ 442: if (ac->dic_session) { 443: anthy_dic_release_session(ac->dic_session); 444: ac->dic_session = NULL; 445: } 446: /* 予測された文字列の解放 */ 447: release_prediction(&ac->prediction); 448: 449: /* 辞書セッションの開始 */ 450: if (!ac->dic_session) { 451: ac->dic_session = anthy_dic_create_session(); 452: if (!ac->dic_session) { 453: return -1; 454: } 455: } 456: 457: prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1)); 458: anthy_xstrcpy(&prediction->str, xs); 459: prediction->str.str[xs->len]=0; 460: 461: nr_prediction = anthy_traverse_record_for_prediction(xs, NULL); 462: prediction->nr_prediction = nr_prediction; 463: 464: if (nr_prediction) { 465: prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) * 466: nr_prediction); 467: anthy_traverse_record_for_prediction(xs, prediction->predictions); 468: } 469: return 0; 470: } 471: 472: static const char * 473: get_change_state(struct anthy_context *ac) 474: { 475: int resize = 0, cand_change = 0; 476: int i; 477: for (i = 0; i < ac->seg_list.nr_segments; i++) { 478: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 479: if (ac->split_info.ce[s->from].initial_seg_len != s->len) { 480: resize = 1; 481: } 482: if (s->committed > 0) { 483: cand_change = 1; 484: } 485: } 486: /**/ 487: if (resize && cand_change) { 488: return "SC"; 489: } 490: if (resize) { 491: return "S"; 492: } 493: if (cand_change) { 494: return "C"; 495: } 496: return "-"; 497: } 498: 499: static void 500: write_history(FILE *fp, struct anthy_context *ac) 501: { 502: int i; 503: /* 読み */ 504: fprintf(fp, "|"); 505: for (i = 0; i < ac->seg_list.nr_segments; i++) { 506: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 507: char *c = anthy_xstr_to_cstr(&s->str, ANTHY_EUC_JP_ENCODING); 508: fprintf(fp, "%s|", c); 509: free(c); 510: } 511: fprintf(fp, " |"); 512: /* 結果 */ 513: for (i = 0; i < ac->seg_list.nr_segments; i++) { 514: struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i); 515: char *c; 516: /**/ 517: if (s->committed < 0) { 518: fprintf(fp, "?|"); 519: continue ; 520: } 521: c = anthy_xstr_to_cstr(&s->cands[s->committed]->str, 522: ANTHY_EUC_JP_ENCODING); 523: fprintf(fp, "%s|", c); 524: free(c); 525: } 526: } 527: 528: void 529: anthy_save_history(const char *fn, struct anthy_context *ac) 530: { 531: FILE *fp; 532: struct stat st; 533: if (!fn) { 534: return ; 535: } 536: fp = fopen(fn, "a"); 537: if (!fp) { 538: return ; 539: } 540: if (stat(fn, &st) || 541: st.st_size > HISTORY_FILE_LIMIT) { 542: fclose(fp); 543: return ; 544: } 545: /**/ 546: fprintf(fp, "anthy-%s ", anthy_get_version_string()); 547: fprintf(fp, "%s ", get_change_state(ac)); 548: write_history(fp, ac); 549: fprintf(fp, "\n"); 550: fclose(fp); 551: /**/ 552: chmod(fn, S_IREAD | S_IWRITE); 553: } 554: 555: /** 候補を表示する */ 556: void 557: anthy_print_candidate(struct cand_ent *ce) 558: { 559: int mod = (ce->score % 1000); 560: int seg_score = 0; 561: 562: if (ce->mw) { 56コンテキストの初期化?