(linenum→info "unix/slp.c:2238")

anthy/9100e/src-main/main.c

    1: /*
    2:  * Comments in this program are written in Japanese,
    3:  * because this program is a Japanese input method.
    4:  * (many Japanese gramatical terms will appear.)
    5:  *
    6:  * Kana-Kanji conversion engine Anthy.
    7:  * 仮名漢字変換エンジンAnthy(アンシー)
    8:  *
    9:  * Funded by IPA未踏ソフトウェア創造事業 2001 9/22
   10:  * Funded by IPA未踏ソフトウェア創造事業 2005
   11:  * Copyright (C) 2000-2007 TABATA Yusuke, UGAWA Tomoharu
   12:  * Copyright (C) 2004-2006 YOSHIDA Yuichi
   13:  * Copyright (C) 2000-2007 KMC(Kyoto University Micro Computer Club)
   14:  * Copyright (C) 2001-2002 TAKAI Kosuke, Nobuoka Takahiro
   15:  *
   16:  */
   17: /*
   18:   This library is free software; you can redistribute it and/or
   19:   modify it under the terms of the GNU Lesser General Public
   20:   License as published by the Free Software Foundation; either
   21:   version 2 of the License, or (at your option) any later version.
   22: 
   23:   This library is distributed in the hope that it will be useful,
   24:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   25:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   26:   Lesser General Public License for more details.
   27: 
   28:   You should have received a copy of the GNU Lesser General Public
   29:   License along with this library; if not, write to the Free Software
   30:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   31:  */
   32: /*
   33:  * Anthyの変換機能はライブラリとして構成されており、この
   34:  * ファイルにはライブラリの提供する関数(API)が記述されています。
   35:  *
   36:  * ライブラリの提供する関数は下記のようなものがあります
   37:  * (1)ライブラリ全体の初期化、終了、設定
   38:  * (2)変換コンテキストの作成、解放
   39:  * (3)変換コンテキストに対する文字列の設定、文節長の変更、候補の取得等
   40:  *
   41:  * インターフェイスに関しては doc/LIBを参照してください
   42:  * Anthyのコードを理解しようとする場合は
   43:  * doc/GLOSSARY で用語を把握することを勧めます
   44:  */
   45: #include <string.h>
   46: #include <stdlib.h>
   47: #include <stdio.h>
   48: 
   49: #include <anthy/dic.h>
   50: #include <anthy/splitter.h>
   51: #include <anthy/conf.h>
   52: #include <anthy/ordering.h>
   53: #include <anthy/logger.h>
   54: #include <anthy/record.h>
   55: #include <anthy/anthy.h>
   56: #include <anthy/record.h>
   57: #include <anthy/xchar.h> /* for KK_VU */
   58: #include "main.h"
   59: #include "config.h"
   60: 
   61: 
   62: /** Anthyの初期化が完了したかどうかのフラグ */
   63: static int is_init_ok;
   64: /** コンテキスト生成時のエンコーディング */
   65: static int default_encoding;
   66: /***/
   67: static char *history_file;
   68: 
   69: /** (API) 全体の初期化 */
   70: int
   71: anthy_init(void)
   72: {
   73:   char *hfn;
   74:   if (is_init_ok) {
   75:     /* 2度初期化しないように */
   76:     return 0;
   77:   }
   78: 
   79:   /* 各サブシステムを順に初期化する */
   80:   if (anthy_init_dic()) {
   81:     anthy_log(0, "Failed to initialize dictionary.\n");
   82:     return -1;
   83:   }
   84: 
   85:   if (anthy_init_splitter()) {
   86:     anthy_log(0, "Failed to init splitter.\n");
   87:     return -1;
   88:   }
   89:   anthy_init_contexts();
   90:   anthy_init_personality();
   91:   anthy_infosort_init();
   92:   anthy_relation_init();
   93: 
   94:   /**/
   95:   default_encoding = ANTHY_EUC_JP_ENCODING;
   96:   is_init_ok = 1;
   97:   history_file = NULL;
   98:   hfn = getenv("ANTHY_HISTORY_FILE");
   99:   if (hfn) {
  100:     history_file = strdup(hfn);
  101:   }
  102: 
  103:   /**/
  104:   return 0;
  105: }
  106: 
  107: /** (API) 全データの解放 */
  108: void
  109: anthy_quit(void)
  110: {
  111:   if (!is_init_ok) {
  112:     return ;
  113:   }
  114:   anthy_quit_contexts();
  115:   anthy_quit_personality();
  116:   anthy_quit_splitter();
  117:   /* 多くのデータ構造はここでallocatorによって解放される */
  118:   anthy_quit_dic();
  119: 
  120:   is_init_ok = 0;
  121:   /**/
  122:   if (history_file) {
  123:     free(history_file);
  124:   }
  125:   history_file = NULL;
  126: }
  127: 
  128: /** (API) 設定項目の上書き */
  129: void
  130: anthy_conf_override(const char *var, const char *val)
  131: {
  132:   anthy_do_conf_override(var, val);
  133: }
  134: 
  135: /** (API) personalityの設定 */
  136: int
  137: anthy_set_personality(const char *id)
  138: {
  139:   return anthy_do_set_personality(id);
  140: }
  141: 
  142: /** (API) 変換contextの作成 */
  143: struct anthy_context *
  144: anthy_create_context(void)
  145: {
  146:   if (!is_init_ok) {
  147:     return 0;
  148:   }
  149:   return anthy_do_create_context(default_encoding);
  150: }
  151: 
  152: /** (API) 変換contextのリセット */
  153: void
  154: anthy_reset_context(struct anthy_context *ac)
  155: {
  156:   anthy_do_reset_context(ac);
  157: }
  158: 
  159: /** (API) 変換contextの解放 */
  160: void
  161: anthy_release_context(struct anthy_context *ac)
  162: {
  163:   anthy_do_release_context(ac);
  164: }
  165: 
  166: /** 
  167:  * 再変換が必要かどうかの判定
  168:  */
  169: static int
  170: need_reconvert(struct anthy_context *ac, xstr *xs)
  171: {
  172:   int i;
  173: 
  174:   if (ac->reconversion_mode == ANTHY_RECONVERT_ALWAYS) {
  175:     return 1;
  176:   }
  177:   if (ac->reconversion_mode == ANTHY_RECONVERT_DISABLE) {
  178:     return 0;
  179:   }
  180: 
  181:   for (i = 0; i < xs->len; ++i) {
  182:     xchar xc = xs->str[i];
  183:     int type = anthy_get_xchar_type(xc);
  184: 
  185:     /* これらの文字種の場合は逆変換する
  186:      * 「ヴ」はフロントエンドが平仮名モードの文字列として送ってくるので、
  187:      * 逆変換の対象とはしない
  188:      */
  189:     if (!(type & (XCT_HIRA | XCT_SYMBOL | XCT_NUM |
  190:                   XCT_WIDENUM | XCT_OPEN | XCT_CLOSE |
  191:                   XCT_ASCII)) &&
  192:         xc != KK_VU) {
  193:       return 1;
  194:     }
  195:   }
  196:   return 0;
  197: }
  198: 
  199: 
  200: /** (API) 変換文字列の設定 */
  201: int
  202: anthy_set_string(struct anthy_context *ac, const char *s)
  203: {
  204:   xstr *xs;
  205:   int retval;
  206: 
  207:   if (!ac) {
  208:     return -1;
  209:   }
  210: 
  211:   /*初期化*/
  212:   anthy_do_reset_context(ac);
  213: 
  214:   /* 辞書セッションの開始 */
  215:   if (!ac->dic_session) {
  216:     ac->dic_session = anthy_dic_create_session();
  217:     if (!ac->dic_session) {
  218:       return -1;
  219:     }
  220:   }
  221: 
  222:   anthy_dic_activate_session(ac->dic_session);
  223:   /* 変換を開始する前に個人辞書をreloadする */
  224:   anthy_reload_record();
  225: 
  226:   xs = anthy_cstr_to_xstr(s, ac->encoding);
  227:   /**/
  228:   if (!need_reconvert(ac, xs)) {
  229:     /* 普通に変換する */
  230:     retval = anthy_do_context_set_str(ac, xs, 0);
  231:   } else {
  232:     /* 漢字やカタカナが混じっていたら再変換してみる */
  233:     struct anthy_conv_stat stat;
  234:     struct seg_ent *seg;
  235:     int i;
  236:     xstr* hira_xs;
  237:     /* 与えられた文字列に変換をかける */
  238:     retval = anthy_do_context_set_str(ac, xs, 1);
  239: 
  240:     /* 各文節の第一候補を取得して平仮名列を得る */
  241:     anthy_get_stat(ac, &stat);
  242:     hira_xs = NULL;
  243:     for (i = 0; i < stat.nr_segment; ++i) {
  244:       seg = anthy_get_nth_segment(&ac->seg_list, i);
  245:       hira_xs = anthy_xstrcat(hira_xs, &seg->cands[0]->str);
  246:     }
  247:     /* 改めて変換を行なう */
  248:     anthy_release_segment_list(ac);
  249:     retval = anthy_do_context_set_str(ac, hira_xs, 0);
  250:     anthy_free_xstr(hira_xs);
  251:   }
  252: 
  253:   anthy_free_xstr(xs);
  254:   return retval;
  255: }
  256: 
  257: /** (API) 文節長の変更 */
  258: void
  259: anthy_resize_segment(struct anthy_context *ac, int nth, int resize)
  260: {
  261:   anthy_dic_activate_session(ac->dic_session);
  262:   anthy_do_resize_segment(ac, nth, resize);
  263: }
  264: 
  265: /** (API) 変換の状態の取得 */
  266: int
  267: anthy_get_stat(struct anthy_context *ac, struct anthy_conv_stat *s)
  268: {
  269:   s->nr_segment = ac->seg_list.nr_segments;
  270:   return 0;
  271: }
  272: 
  273: /** (API) 文節の状態の取得 */
  274: int
  275: anthy_get_segment_stat(struct anthy_context *ac, int n,
  276:                        struct anthy_segment_stat *s)
  277: {
  278:   struct seg_ent *seg;
  279:   seg = anthy_get_nth_segment(&ac->seg_list, n);
  280:   if (seg) {
  281:     s->nr_candidate = seg->nr_cands;
  282:     s->seg_len = seg->str.len;
  283:     return 0;
  284:   }
  285:   return -1;
  286: }
  287: 
  288: static int
  289: get_special_candidate_index(int nth, struct seg_ent *seg)
  290: {
  291:   int i;
  292:   int mask = XCT_NONE;
  293:   if (nth >= 0) {
  294:     return nth;
  295:   }
  296:   if (nth == NTH_UNCONVERTED_CANDIDATE ||
  297:       nth == NTH_HALFKANA_CANDIDATE) {
  298:     return nth;
  299:   }
  300:   if (nth == NTH_KATAKANA_CANDIDATE) {
  301:     mask = XCT_KATA;
  302:   } else if (nth == NTH_HIRAGANA_CANDIDATE) {
  303:     mask = XCT_HIRA;
  304:   }
  305:   for (i = 0; i < seg->nr_cands; i++) {
  306:     if (anthy_get_xstr_type(&seg->cands[i]->str) & mask) {
  307:       return i;
  308:     }
  309:   }
  310:   return NTH_UNCONVERTED_CANDIDATE;
  311: }
  312: 
  313: /** (API) 文節の取得 */
  314: int
  315: anthy_get_segment(struct anthy_context *ac, int nth_seg,
  316:                   int nth_cand, char *buf, int buflen)
  317: {
  318:   struct seg_ent *seg;
  319:   char *p;
  320:   int len;
  321: 
  322:   /* 文節を取り出す */
  323:   if (nth_seg < 0 || nth_seg >= ac->seg_list.nr_segments) {
  324:     return -1;
  325:   }
  326:   seg = anthy_get_nth_segment(&ac->seg_list, nth_seg);
  327: 
  328:   /* 文節から候補を取り出す */
  329:   p = NULL;
  330:   if (nth_cand < 0) {
  331:     nth_cand = get_special_candidate_index(nth_cand, seg);
  332:   }
  333:   if (nth_cand == NTH_HALFKANA_CANDIDATE) {
  334:     xstr *xs = anthy_xstr_hira_to_half_kata(&seg->str);
  335:     p = anthy_xstr_to_cstr(xs, ac->encoding);
  336:     anthy_free_xstr(xs);
  337:   } else if (nth_cand == NTH_UNCONVERTED_CANDIDATE) {
  338:     /* 変換前の文字列を取得する */
  339:     p = anthy_xstr_to_cstr(&seg->str, ac->encoding);
  340:   } else if (nth_cand >= 0 && nth_cand < seg->nr_cands) {
  341:     p = anthy_xstr_to_cstr(&seg->cands[nth_cand]->str, ac->encoding);
  342:   }
  343:   if (!p) {
  344:     return -1;
  345:   }
  346: 
  347:   /* バッファに書き込む */
  348:   len = strlen(p);
  349:   if (!buf) {
  350:     free(p);
  351:     return len;
  352:   }
  353:   if (len + 1 > buflen) {
  354:     /* バッファが足りません */
  355:     free(p);
  356:     return -1;
  357:   }
  358:   strcpy(buf, p);
  359:   free(p);
  360:   return len;
  361: }
  362: 
  363: /* すべての文節がコミットされたかcheckする */
  364: static int
  365: commit_all_segment_p(struct anthy_context *ac)
  366: {
  367:   int i;
  368:   struct seg_ent *se;
  369:   for (i = 0; i < ac->seg_list.nr_segments; i++) {
  370:     se = anthy_get_nth_segment(&ac->seg_list, i);
  371:     if (se->committed < 0) {
  372:       return 0;
  373:     }
  374:   }
  375:   return 1;
  376: }
  377: 
  378: /** (API) 文節の確定 */
  379: int
  380: anthy_commit_segment(struct anthy_context *ac, int s, int c)
  381: {
  382:   struct seg_ent *seg;
  383:   if (!ac->str.str) {
  384:     return -1;
  385:   }
  386:   if (s < 0 || s >= ac->seg_list.nr_segments) {
  387:     return -1;
  388:   }
  389:   if (commit_all_segment_p(ac)) {
  390:     /* すでに全てのセグメントがコミットされている */
  391:     return -1;
  392:   }
  393: 
  394:   anthy_dic_activate_session(ac->dic_session);
  395:   seg = anthy_get_nth_segment(&ac->seg_list, s);
  396:   if (c < 0) {
  397:     c = get_special_candidate_index(c, seg);
  398:   }
  399:   if (c == NTH_UNCONVERTED_CANDIDATE) {
  400:     /*
  401:      * 変換前の文字列がコミットされたので,それに対応する候補の番号を探す
  402:      */
  403:     int i;
  404:     for (i = 0; i < seg->nr_cands; i++) {
  405:       if (!anthy_xstrcmp(&seg->str, &seg->cands[i]->str)) {
  406:         c = i;
  407:       }
  408:     }
  409:   }
  410:   if (c < 0 || c >= seg->nr_cands) {
  411:     return -1;
  412:   }
  413:   seg->committed = c;
  414: 
  415:   if (commit_all_segment_p(ac)) {
  416:     /* 今、すべてのセグメントがコミットされた */
  417:     anthy_proc_commit(&ac->seg_list, &ac->split_info);
  418:     /**/
  419:     anthy_save_history(history_file, ac);
  420:   }
  421:   return 0;
  422: }
  423: 
  424: /** (API) 予測してほしい文字列の設定 */
  425: int
  426: anthy_set_prediction_string(struct anthy_context *ac, const char* s)
  427: {
  428:   int retval;
  429:   xstr *xs;
  430: 
  431:   anthy_dic_activate_session(ac->dic_session);
  432:   /* 予測を開始する前に個人辞書をreloadする */
  433:   anthy_reload_record();
  434: 
  435: 
  436:   xs = anthy_cstr_to_xstr(s, ac->encoding);
  437: 
  438:   retval = anthy_do_set_prediction_str(ac, xs);
  439: 
  440:   anthy_free_xstr(xs);
  441: 
  442:   return retval;
  443: }
  444: 
  445: /** (API) 予測変換の状態の取得 */
  446: int 
  447: anthy_get_prediction_stat(struct anthy_context *ac, struct anthy_prediction_stat * ps)
  448: {
  449:   ps->nr_prediction = ac->prediction.nr_prediction;
  450:   return 0;
  451: }
  452: 
  453: /** (API) 予測変換の候補の取得 */
  454: int
  455: anthy_get_prediction(struct anthy_context *ac, int nth, char* buf, int buflen)
  456: {
  457:   struct prediction_cache* prediction = &ac->prediction;
  458:   int nr_prediction = prediction->nr_prediction;
  459:   char* p;
  460:   int len;
  461: 
  462:   if (nth < 0 || nr_prediction <= nth) {
  463:     return -1;
  464:   }
  465: 
  466:   p = anthy_xstr_to_cstr(prediction->predictions[nth].str, ac->encoding);
  467: 
  468:   /* バッファに書き込む */
  469:   len = strlen(p);
  470:   if (!buf) {
  471:     free(p);
  472:     return len;
  473:   }
  474:   if (len + 1 > buflen) {
  475:     free(p);
  476:     return -1;
  477:   } else {
  478:     strcpy(buf, p);
  479:     free(p);
  480:     return len;
  481:   }
  482: }
  483: 
  484: /** (API) 予測の結果を確定する
  485:  */
  486: int
  487: anthy_commit_prediction(struct anthy_context *ac, int nth)
  488: {
  489:   struct prediction_cache* pc = &ac->prediction;
  490:   if (nth < 0 || nth >= pc->nr_prediction) {
  491:     return -1;
  492:   }
  493:   anthy_do_commit_prediction(pc->predictions[nth].src_str,
  494:                              pc->predictions[nth].str);
  495:   return 0;
  496: }
  497: 
  498: /** (API) 開発用 */
  499: void
  500: anthy_print_context(struct anthy_context *ac)
  501: {
  502:   anthy_do_print_context(ac, default_encoding);
  503: }
  504: 
  505: /** (API) Anthy ライブラリのバージョンを表す文字列を返す
  506:  * 共有ライブラリでは外部変数のエクスポートは好ましくないので関数にしてある
  507:  */
  508: const char *
  509: anthy_get_version_string (void)
  510: {
  511: #ifdef VERSION
  512:   return VERSION;
  513: #else  /* just in case */
  514:   return "(unknown)";
  515: #endif
  516: }
  517: 
  518: /** (API) */
  519: int
  520: anthy_context_set_encoding(struct anthy_context *ac, int encoding)
  521: {
  522:   if (!ac) {
  523:     return ANTHY_EUC_JP_ENCODING;
  524:   }
  525:   if (encoding == ANTHY_UTF8_ENCODING ||
  526:       encoding == ANTHY_EUC_JP_ENCODING) {
  527:     ac->encoding = encoding;
  528:   }
  529:   return ac->encoding;
  530: }
  531: 
  532: /** (API) */
  533: int
  534: anthy_set_reconversion_mode(anthy_context_t ac, int mode)
  535: {
  536:   if (!ac) {
  537:     return ANTHY_RECONVERT_AUTO;
  538:   }
  539:   if (mode == ANTHY_RECONVERT_AUTO ||
  540:       mode == ANTHY_RECONVERT_DISABLE ||
  541:       mode == ANTHY_RECONVERT_ALWAYS) {
  542:     ac->reconversion_mode = mode;
  543:   }
  544:   return ac->reconversion_mode;
  545: }
1
Syntax (Markdown)