(linenum→info "unix/slp.c:2238")

anthy/9100e/src-worddic/priv_dic.c

    1: /*
    2:  * 個人辞書を扱うためのコード
    3:  *
    4:  * ユーザが明示的に登録した単語だけでなく、
    5:  * 未知語を自動的に学習して管理するAPIも持つ。
    6:  *
    7:  * Copyright (C) 2000-2007 TABATA Yusuke
    8:  */
    9: /*
   10:   This library is free software; you can redistribute it and/or
   11:   modify it under the terms of the GNU Lesser General Public
   12:   License as published by the Free Software Foundation; either
   13:   version 2 of the License, or (at your option) any later version.
   14: 
   15:   This library is distributed in the hope that it will be useful,
   16:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   17:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   18:   Lesser General Public License for more details.
   19: 
   20:   You should have received a copy of the GNU Lesser General Public
   21:   License along with this library; if not, write to the Free Software
   22:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   23:  */
   24: #include <sys/types.h>
   25: #include <sys/stat.h>
   26: #include <dirent.h>
   27: #include <fcntl.h>
   28: #include <unistd.h>
   29: 
   30: #include <stdlib.h>
   31: #include <string.h>
   32: #include <stdio.h>
   33: 
   34: #include <anthy/anthy.h>
   35: #include <anthy/alloc.h>
   36: #include <anthy/dic.h>
   37: #include <anthy/record.h>
   38: #include <anthy/dicutil.h>
   39: #include <anthy/conf.h>
   40: #include <anthy/logger.h>
   41: #include <anthy/texttrie.h>
   42: #include <anthy/textdict.h>
   43: #include <anthy/word_dic.h>
   44: #include "dic_main.h"
   45: #include "dic_ent.h"
   46: 
   47: /* 個人辞書 */
   48: struct text_trie *anthy_private_tt_dic;
   49: struct textdict *anthy_private_text_dic;
   50: static struct textdict *anthy_imported_text_dic;
   51: static char *imported_dic_dir;
   52: /* ロック用の変数 */
   53: static char *lock_fn;
   54: static int lock_depth;
   55: static int lock_fd;
   56: 
   57: #define MAX_DICT_SIZE 100000000
   58: 
   59: /* 個人辞書のディレクトリの有無を確認する */
   60: void
   61: anthy_check_user_dir(void)
   62: {
   63:   const char *hd;
   64:   char *dn;
   65:   struct stat st;
   66:   hd = anthy_conf_get_str("HOME");
   67:   dn = alloca(strlen(hd) + 10);
   68:   sprintf(dn, "%s/.anthy", hd);
   69:   if (stat(dn, &st) || !S_ISDIR(st.st_mode)) {
   70:     int r;
   71:     /*fprintf(stderr, "Anthy: Failed to open anthy directory(%s).\n", dn);*/
   72:     r = mkdir(dn, S_IRWXU);
   73:     if (r == -1){
   74:       anthy_log(0, "Failed to create profile directory\n");
   75:       return ;
   76:     }
   77:     /*fprintf(stderr, "Anthy: Created\n");*/
   78:     r = chmod(dn, S_IRUSR | S_IWUSR | S_IXUSR);
   79:     if (r == -1) {
   80:       anthy_log(0, "But failed to change permission.\n");
   81:     }
   82:   }
   83: }
   84: 
   85: static void
   86: init_lock_fn(const char *home, const char *id)
   87: {
   88:   lock_fn = malloc(strlen(home) + strlen(id) + 40);
   89:   sprintf(lock_fn, "%s/.anthy/lock-file_%s", home, id);
   90: }
   91: 
   92: static struct text_trie *
   93: open_tt_dic(const char *home, const char *id)
   94: {
   95:   struct text_trie *tt;
   96:   char *buf = malloc(strlen(home) + strlen(id) + 40);
   97:   sprintf(buf, "%s/.anthy/private_dict_%s.tt", home, id);
   98:   tt = anthy_trie_open(buf, 0);
   99:   free(buf);
  100:   return tt;
  101: }
  102: 
  103: static struct textdict *
  104: open_textdic(const char *home, const char *name, const char *id)
  105: {
  106:   char *fn = malloc(strlen(home) + strlen(name) + strlen(id) + 10);
  107:   struct textdict *td;
  108:   sprintf(fn, "%s/.anthy/%s%s", home, name, id);
  109:   td = anthy_textdict_open(fn, 0);
  110:   free(fn);
  111:   return td;
  112: }
  113: 
  114: void
  115: anthy_priv_dic_lock(void)
  116: {
  117:   struct flock lck;
  118:   lock_depth ++;
  119:   if (lock_depth > 1) {
  120:     return ;
  121:   }
  122:   if (!lock_fn) {
  123:     /* 初期化をミスってる */
  124:     lock_fd = -1;
  125:     return ;
  126:   }
  127: 
  128:   /* ファイルロックの方法は多数あるが、この方法はcygwinでも動くので採用した */
  129:   lock_fd = open(lock_fn, O_CREAT|O_RDWR, S_IREAD|S_IWRITE);
  130:   if (lock_fd == -1) {
  131:     return ;
  132:   }
  133: 
  134:   lck.l_type = F_WRLCK;
  135:   lck.l_whence = (short) 0;
  136:   lck.l_start = (off_t) 0;
  137:   lck.l_len = (off_t) 1;
  138:   if (fcntl(lock_fd, F_SETLKW, &lck) == -1) {
  139:     close(lock_fd);
  140:     lock_fd = -1;
  141:   }
  142: }
  143: 
  144: void
  145: anthy_priv_dic_unlock(void)
  146: {
  147:   lock_depth --;
  148:   if (lock_depth > 0) {
  149:     return ;
  150:   }
  151: 
  152:   if (lock_fd != -1) {
  153:     close(lock_fd);
  154:     lock_fd = -1;
  155:   }
  156: }
  157: 
  158: void
  159: anthy_priv_dic_update(void)
  160: {
  161:   if (!anthy_private_tt_dic) {
  162:     return ;
  163:   }
  164: 
  165:   anthy_trie_update_mapping(anthy_private_tt_dic);
  166: }
  167: 
  168: /* seq_entに追加する */
  169: static void
  170: add_to_seq_ent(const char *line, int encoding, struct seq_ent *seq)
  171: {
  172:   struct word_line wl;
  173:   wtype_t wt;
  174:   xstr *xs;
  175:   /* */
  176:   if (anthy_parse_word_line(line, &wl)) {
  177:     return ;
  178:   }
  179:   xs = anthy_cstr_to_xstr(wl.word, encoding);
  180:   anthy_type_to_wtype(wl.wt, &wt);
  181:   anthy_mem_dic_push_back_dic_ent(seq, 0, xs, wt,
  182:                                   NULL, wl.freq, 0);
  183:   anthy_free_xstr(xs);
  184: }
  185: 
  186: /* texttrieに登録されているかをチェックし、
  187:  * 登録されていればseq_entに追加する
  188:  */
  189: static void
  190: copy_words_from_tt(struct seq_ent *seq, xstr *xs,
  191:                    int encoding, const char *prefix)
  192: {
  193:   char *key, *v;
  194:   int key_len;
  195:   char *key_buf;
  196:   int prefix_len = strlen(prefix);
  197:   /**/
  198:   if (!anthy_private_tt_dic) {
  199:     return ;
  200:   }
  201:   key = anthy_xstr_to_cstr(xs, encoding);
  202:   key_len = strlen(key);
  203:   key_buf = malloc(key_len + 12);
  204:   /* 辞書中には各単語が「見出し XXXX」(XXXXはランダムな文字列)を
  205:    * キーとして保存されているので列挙する
  206:    */
  207:   sprintf(key_buf, "%s%s ", prefix, key);
  208:   do {
  209:     if (strncmp(&key_buf[2], key, key_len) ||
  210:         strncmp(&key_buf[0], prefix, prefix_len) ||
  211:         key_buf[key_len+2] != ' ') {
  212:       /* 「見出し 」で始まっていないので対象外 */
  213:       break;
  214:     }
  215:     /* 単語を読み出して登録 */
  216:     v = anthy_trie_find(anthy_private_tt_dic, key_buf);
  217:     if (v) {
  218:       add_to_seq_ent(v, encoding, seq);
  219:     }
  220:     free(v);
  221:     /**/
  222:   } while (anthy_trie_find_next_key(anthy_private_tt_dic,
  223:                                     key_buf, key_len + 8));
  224:   free(key);
  225:   free(key_buf);
  226: }
  227: 
  228: void
  229: anthy_copy_words_from_private_dic(struct seq_ent *seq,
  230:                                   xstr *xs, int is_reverse)
  231: {
  232:   if (is_reverse) {
  233:     return ;
  234:   }
  235:   /* 個人辞書から取ってくる */
  236:   copy_words_from_tt(seq, xs, ANTHY_EUC_JP_ENCODING, "  ");
  237:   copy_words_from_tt(seq, xs, ANTHY_UTF8_ENCODING, " p");
  238:   /**/
  239:   if (!anthy_select_section("UNKNOWN_WORD", 0) &&
  240:       !anthy_select_row(xs, 0)) {
  241:     wtype_t wt;
  242:     xstr *word_xs;
  243:     anthy_type_to_wtype("#T35", &wt);
  244:     word_xs = anthy_get_nth_xstr(0);
  245:     anthy_mem_dic_push_back_dic_ent(seq, 0, word_xs, wt, NULL, 10, 0);
  246:   }
  247: }
  248: 
  249: int
  250: anthy_parse_word_line(const char *line, struct word_line *res)
  251: {
  252:   int i;
  253:   const char *buf = line;
  254:   /* default values */
  255:   res->wt[0] = 0;
  256:   res->freq = 1;
  257:   res->word = NULL;
  258:   /* 品詞と頻度をparse */
  259:   for (i = 0; i < 9 && *buf && *buf != '*' && *buf != ' '; buf++, i++) {
  260:     res->wt[i] = *buf;
  261:   }
  262:   res->wt[i] = 0;
  263:   if (*buf == '*') {
  264:     buf ++;
  265:     sscanf(buf, "%d", &res->freq);
  266:     buf = strchr(buf, ' ');
  267:   } else {
  268:     res->freq = 1;
  269:   }
  270:   if (!buf || !(*buf)) {
  271:     res->word = "";
  272:     return -1;
  273:   }
  274:   buf++;
  275:   /* 単語 */
  276:   res->word = buf;
  277:   return 0;
  278: }
  279: 
  280: void
  281: anthy_ask_scan(void (*request_scan)(struct textdict *, void *),
  282:                void *arg)
  283: {
  284:   DIR *dir;
  285:   struct dirent *de;
  286:   int size = 0;
  287:   request_scan(anthy_private_text_dic, arg);
  288:   request_scan(anthy_imported_text_dic, arg);
  289:   dir = opendir(imported_dic_dir);
  290:   if (!dir) {
  291:     return ;
  292:   }
  293:   while ((de = readdir(dir))) {
  294:     struct stat st_buf;
  295:     struct textdict *td;
  296:     char *fn = malloc(strlen(imported_dic_dir) +
  297:                       strlen(de->d_name) + 3);
  298:     if (!fn) {
  299:       break;
  300:     }
  301:     sprintf(fn, "%s/%s", imported_dic_dir, de->d_name);
  302:     if (stat(fn, &st_buf)) {
  303:       free(fn);
  304:       continue;
  305:     }
  306:     if (!S_ISREG(st_buf.st_mode)) {
  307:       free(fn);
  308:       continue;
  309:     }
  310:     size += st_buf.st_size;
  311:     if (size > MAX_DICT_SIZE) {
  312:       free(fn);
  313:       break;
  314:     }
  315:     td = anthy_textdict_open(fn, 0);
  316:     request_scan(td, arg);
  317:     anthy_textdict_close(td);
  318:     free(fn);
  319:   }
  320:   closedir(dir);
  321: }
  322: 
  323: static void
  324: add_unknown_word(xstr *yomi, xstr *word)
  325: {
  326:   /* recordに追加 */
  327:   if (anthy_select_section("UNKNOWN_WORD", 1)) {
  328:     return ;
  329:   }
  330:   if (!anthy_select_row(yomi, 0)) {
  331:     anthy_mark_row_used();
  332:   }
  333:   if (anthy_select_row(yomi, 1)) {
  334:     return ;
  335:   }
  336:   anthy_set_nth_xstr(0, word);
  337: }
  338: 
  339: void
  340: anthy_add_unknown_word(xstr *yomi, xstr *word)
  341: {
  342:   if (!(anthy_get_xstr_type(word) & XCT_KATA) &&
  343:       !(anthy_get_xstr_type(word) & XCT_HIRA)) {
  344:     return ;
  345:   }
  346:   if (yomi->len < 4 || yomi->len > 30) {
  347:     return ;
  348:   }
  349:   /**/
  350:   add_unknown_word(yomi, word);
  351: }
  352: 
  353: void
  354: anthy_forget_unused_unknown_word(xstr *xs)
  355: {
  356:   char key_buf[128];
  357:   char *v;
  358: 
  359:   if (!anthy_private_tt_dic) {
  360:     return ;
  361:   }
  362: 
  363:   v = anthy_xstr_to_cstr(xs, ANTHY_UTF8_ENCODING);
  364:   sprintf(key_buf, " U%s 0", v);
  365:   free(v);
  366:   anthy_trie_delete(anthy_private_tt_dic, key_buf);
  367: 
  368:   /* recordに記録された物を消す */
  369:   if (anthy_select_section("UNKNOWN_WORD", 0)) {
  370:     return ;
  371:   }
  372:   if (!anthy_select_row(xs, 0)) {
  373:     anthy_release_row();
  374:   }
  375: }
  376: 
  377: void
  378: anthy_init_private_dic(const char *id)
  379: {
  380:   const char *home = anthy_conf_get_str("HOME");
  381:   if (anthy_private_tt_dic) {
  382:     anthy_trie_close(anthy_private_tt_dic);
  383:   }
  384:   /**/
  385:   anthy_textdict_close(anthy_private_text_dic);
  386:   anthy_textdict_close(anthy_imported_text_dic);
  387:   /**/
  388:   if (lock_fn) {
  389:     free(lock_fn);
  390:   }
  391:   init_lock_fn(home, id);
  392:   anthy_private_tt_dic = open_tt_dic(home, id);
  393:   /**/
  394:   anthy_private_text_dic = open_textdic(home, "private_words_", id);
  395:   anthy_imported_text_dic = open_textdic(home, "imported_words_", id);
  396:   imported_dic_dir = malloc(strlen(home) + strlen(id) + 30);
  397:   sprintf(imported_dic_dir, "%s/.anthy/imported_words_%s.d/", home, id);
  398: }
  399: 
  400: void
  401: anthy_release_private_dic(void)
  402: {
  403:   if (anthy_private_tt_dic) {
  404:     anthy_trie_close(anthy_private_tt_dic);
  405:     anthy_private_tt_dic = NULL;
  406:   }
  407:   /**/
  408:   anthy_textdict_close(anthy_private_text_dic);
  409:   anthy_textdict_close(anthy_imported_text_dic);
  410:   free(imported_dic_dir);
  411:   anthy_private_text_dic = NULL;
  412:   anthy_imported_text_dic = NULL;
  413:   imported_dic_dir = NULL;
  414:   /**/
  415:   if (lock_depth > 0) {
  416:     /* not sane situation */
  417:     lock_depth = 0;
  418:     if (lock_fn) {
  419:       unlink(lock_fn);
  420:     }
  421:   }
  422:   /**/
  423:   free(lock_fn);
  424:   lock_fn = NULL;
  425: }
Syntax (Markdown)