(linenum→info "unix/slp.c:2238")

anthy/9100e/mkworddic/mkudic.c

    1: /*
    2:  * 用例辞書を作る
    3:  *
    4:  * Copyright (C) 2003-2005 TABATA Yusuke
    5:  */
    6: #include <stdlib.h>
    7: #include <string.h>
    8: #include <stdio.h>
    9: 
   10: #include <anthy/matrix.h>
   11: #include "mkdic.h"
   12: 
   13: #define LINE_LEN 256
   14: 
   15: /* 用例 */
   16: struct use_case {
   17:   int id[2];
   18:   struct use_case *next;
   19: };
   20: 
   21: /* 用例辞書 */
   22: struct uc_dict {
   23:   /* 用例リスト */
   24:   struct use_case uc_head;
   25:   int nr_ucs;
   26: };
   27: 
   28: /* 用例定義の行から単語のidを求める
   29:  */
   30: static int
   31: get_id_from_word_line(char *buf)
   32: {
   33:   char yomi[LINE_LEN];
   34:   char okuri[LINE_LEN];
   35:   char wt[LINE_LEN];
   36:   char kanji[LINE_LEN];
   37:   int res, id;
   38:   xstr *xs;
   39: 
   40:   res = sscanf(buf, "%s %s %s %s", yomi, okuri, wt, kanji);
   41:   if (res != 4) {
   42:     return -1;
   43:   }
   44:   xs = anthy_cstr_to_xstr(kanji, 0);
   45:   id = anthy_xstr_hash(xs);
   46:   anthy_free_xstr(xs);
   47:   return id;
   48: }
   49: 
   50: static void
   51: commit_uc(struct uc_dict *dict, int x, int y)
   52: {
   53:   struct use_case *uc;
   54:   if (x < 0 || y < 0) {
   55:     return ;
   56:   }
   57:   uc = malloc(sizeof(struct use_case));
   58:   uc->id[0] = x;
   59:   uc->id[1] = y;
   60:   /**/
   61:   uc->next = dict->uc_head.next;
   62:   dict->uc_head.next = uc;
   63:   dict->nr_ucs ++;
   64: }
   65: 
   66: /* 用例データベースを作る */
   67: struct uc_dict *
   68: create_uc_dict(void)
   69: {
   70:   struct uc_dict *dict = malloc(sizeof(struct uc_dict));
   71: 
   72:   dict->uc_head.next = NULL;
   73:   dict->nr_ucs = 0;
   74: 
   75:   return dict;
   76: }
   77: 
   78: /* 用例ファイルを読み込む */
   79: void
   80: read_uc_file(struct uc_dict *dict, const char *fn)
   81: {
   82:   char buf[LINE_LEN];
   83:   FILE *uc_file;
   84:   int off, base = 0, cur;
   85:   int line_number = 0;
   86: 
   87:   uc_file = fopen(fn, "r");
   88:   if (!uc_file) {
   89:     return ;
   90:   }
   91: 
   92:   /* off=0      : 最初の単語
   93:    * off=1,2..n : それと関係ある単語
   94:    */
   95:   off = 0;
   96:   while (fgets(buf, LINE_LEN, uc_file)) {
   97:     /**/
   98:     line_number ++;
   99:     /**/
  100:     if (buf[0] == '#') {
  101:       /* コメント */
  102:       continue;
  103:     }
  104:     if (buf[0] == '-') {
  105:       /* 区切り記号 */
  106:       off = 0;
  107:       continue;
  108:     }
  109:     cur = get_id_from_word_line(buf);
  110:     if (cur == -1) {
  111:       fprintf(stderr, "Invalid line(%d):%s\n", line_number, buf);
  112:     }
  113:     /**/
  114:     if (off == 0) {
  115:       /* 一つめの項目 */
  116:       base = cur;
  117:     } else {
  118:       /* 二つめ以降の項目 */
  119:       commit_uc(dict, cur, base);
  120:     }
  121:     off ++;
  122:   }
  123: }
  124: 
  125: /* 用例辞書をファイルに書き出す */
  126: void
  127: make_ucdict(FILE *uc_out, struct uc_dict *dict)
  128: {
  129:   struct use_case *uc;
  130:   struct sparse_matrix *sm;
  131:   struct matrix_image *mi;
  132:   int i;
  133:   /* 疎行列に詰め込む */
  134:   sm = anthy_sparse_matrix_new();
  135:   if (dict) {
  136:     for (uc = dict->uc_head.next; uc; uc = uc->next) {
  137:       anthy_sparse_matrix_set(sm, uc->id[0], uc->id[1], 1, NULL);
  138:     }
  139:   }
  140:   anthy_sparse_matrix_make_matrix(sm);
  141:   /* 疎行列のイメージを作成してファイルに書き出す */
  142:   mi = anthy_matrix_image_new(sm);
  143:   for (i = 0; i < mi->size; i++) {
  144:     write_nl(uc_out, mi->image[i]);
  145:   }
  146:   if (dict) {
  147:     printf("udic: %d use examples.\n", dict->nr_ucs);
  148:   } else {
  149:     printf("udic: no use examples.\n");
  150:   }
  151: 
  152: }
Syntax (Markdown)