(linenum→info "unix/slp.c:2238")

anthy/9100e/mkanthydic/mkfiledic.c

    1: /*
    2:  * ファイルをまとめて辞書ファイルを生成する
    3:  *
    4:  * デフォルトではひとつ上のディレクトリ「..」に各ファイルの
    5:  * パス名を付けるが、このコマンドに対する -p オプションで
    6:  * 変更することができる。
    7:  *
    8:  * entry_num個のファイルに対して
    9:  *  0: entry_num ファイルの個数
   10:  *  1: 各ファイルの情報
   11:  *    n * 3    : name_offset
   12:  *    n * 3 + 1: strlen(key)
   13:  *    n * 3 + 2: contents_offset
   14:  *  [name_of_section]*entry_num
   15:  *   : 各ファイルの名前
   16:  *  [file]*entry_num
   17:  *   : 各ファイルの内容
   18:  *
   19:  * Copyright (C) 2005-2006 YOSHIDA Yuichi
   20:  * Copyright (C) 2006-2007 TABATA Yusuke
   21:  *
   22:  */
   23: /*
   24:   This library is free software; you can redistribute it and/or
   25:   modify it under the terms of the GNU Lesser General Public
   26:   License as published by the Free Software Foundation; either
   27:   version 2 of the License, or (at your option) any later version.
   28: 
   29:   This library is distributed in the hope that it will be useful,
   30:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   31:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   32:   Lesser General Public License for more details.
   33: 
   34:   You should have received a copy of the GNU Lesser General Public
   35:   License along with this library; if not, write to the Free Software
   36:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   37:  */
   38: #include <stdio.h>
   39: #include <string.h>
   40: #include <stdlib.h>
   41: #include <sys/stat.h>
   42: 
   43: #include <anthy/xstr.h>
   44: #include <anthy/diclib.h>
   45: 
   46: #define SECTION_ALIGNMENT 64
   47: #define DIC_NAME "anthy.dic"
   48: 
   49: struct header_entry {
   50:   const char* key;
   51:   const char* file_name;
   52: };
   53: 
   54: static void
   55: write_nl(FILE* fp, int i)
   56: {
   57:   i = anthy_dic_htonl(i);
   58:   fwrite(&i, sizeof(int), 1, fp);
   59: }
   60: 
   61: 
   62: /** ファイルのサイズを取得する */
   63: static int
   64: get_file_size(const char* fn)
   65: {
   66:   struct stat st;
   67:   if (stat(fn, &st) < 0) {
   68:     return -1;
   69:   }
   70:   return (st.st_size + SECTION_ALIGNMENT - 1) & (-SECTION_ALIGNMENT);
   71: }
   72: 
   73: static char *
   74: get_file_name(const char *prefix, struct header_entry* entry)
   75: {
   76:   char *fn = malloc(strlen(prefix) + strlen(entry->file_name) + 4);
   77:   sprintf(fn, "%s/%s", prefix, entry->file_name);
   78:   return fn;
   79: }
   80: 
   81: static int
   82: write_header(FILE* fp, const char *prefix,
   83:              int entry_num, struct header_entry* entries)
   84: {
   85:   int i;
   86:   int name_offset;
   87:   int contents_offset;
   88: 
   89:   name_offset = sizeof(int) * (1 + entry_num * 3);
   90:   contents_offset = name_offset;
   91: 
   92:   for (i = 0; i < entry_num; ++i) {
   93:     contents_offset += strlen(entries[i].key);
   94:   }
   95:   contents_offset =
   96:     (contents_offset + SECTION_ALIGNMENT - 1) & (-SECTION_ALIGNMENT);
   97: 
   98:   /* ファイルの数 */
   99:   write_nl(fp, entry_num);
  100: 
  101:   /* 各ファイルの場所を出力する */
  102:   for (i = 0; i < entry_num; ++i) {
  103:     char *fn = get_file_name(prefix, &entries[i]);
  104:     int file_size = get_file_size(fn);
  105:     if (file_size == -1) {
  106:       fprintf(stderr, "failed to get file size of (%s).\n",
  107:               fn);
  108:       free(fn);
  109:       return -1;
  110:     }
  111:     free(fn);
  112:     /**/
  113:     write_nl(fp, name_offset);
  114:     write_nl(fp, strlen(entries[i].key));
  115:     write_nl(fp, contents_offset);
  116:     /**/
  117:     name_offset += strlen(entries[i].key);
  118:     contents_offset += file_size;
  119:   }
  120: 
  121:   /* 各ファイルの名前を出力する */
  122:   for (i = 0; i < entry_num; ++i) {
  123:     fprintf(fp, "%s", entries[i].key);
  124:   }
  125:   return 0;
  126: }
  127: 
  128: 
  129: 
  130: static void
  131: copy_file(FILE *in, FILE *out)
  132: {
  133:   int i;
  134:   size_t nread;
  135:   char buf[BUFSIZ];
  136: 
  137:   /* Pad OUT to the next aligned offset.  */
  138:   for (i = ftell (out); i & (SECTION_ALIGNMENT - 1); i++) {
  139:     fputc (0, out);
  140:   }
  141: 
  142:   /* Copy the contents.  */
  143:   rewind(in);
  144:   while ((nread = fread (buf, 1, sizeof buf, in)) > 0) {
  145:     if (fwrite (buf, 1, nread, out) < nread) {
  146:       exit (1);
  147:     }
  148:   }
  149: }
  150: 
  151: static void
  152: write_contents(FILE* fp, const char *prefix,
  153:                int entry_num, struct header_entry* entries)
  154: {
  155:   int i;
  156:   for (i = 0; i < entry_num; ++i) {
  157:     FILE* in_fp;
  158:     char *fn = get_file_name(prefix, &entries[i]);
  159: 
  160:     in_fp = fopen(fn, "r");
  161:     if (in_fp == NULL) {
  162:       printf("failed to open %s\n", fn);
  163:       free(fn);
  164:       break;
  165:     }
  166:     printf("  copying %s (%s)\n", fn, entries[i].key);
  167:     free(fn);
  168:     copy_file(in_fp, fp);
  169:     fclose(in_fp);
  170:   }
  171: }
  172: 
  173: 
  174: static void
  175: create_file_dic(const char* fn, const char *prefix,
  176:                 int entry_num, struct header_entry* entries)
  177: {
  178:   FILE* fp = fopen(fn, "w");
  179:   int res;
  180:   if (!fp) {
  181:     fprintf(stderr, "failed to open file dictionary file (%s).\n", fn);
  182:     exit(1);
  183:   }
  184:   /* ヘッダを書き出す */
  185:   res = write_header(fp, prefix, entry_num, entries);
  186:   if (res) {
  187:     exit(1);
  188:   }
  189: 
  190:   /* ファイルの中身を書き出す */
  191:   write_contents(fp, prefix, entry_num, entries);
  192:   fclose(fp);
  193: }
  194: 
  195: 
  196: int
  197: main(int argc, char* argv[])
  198: {
  199:   int i;
  200:   const char *prefix = "..";
  201:   const char *prev_arg = "";
  202: 
  203:   struct header_entry entries[] = {
  204:     {"word_dic", "/mkworddic/anthy.wdic"},
  205:     {"dep_dic", "/depgraph/anthy.dep"},
  206:     {"trans_info", "/calctrans/anthy.trans_info"},
  207:     {"cand_info", "/calctrans/anthy.cand_info"},
  208:     {"weak_words", "/calctrans/anthy.weak_words"},
  209:     {"corpus_bucket", "/calctrans/anthy.corpus_bucket"},
  210:     {"corpus_array", "/calctrans/anthy.corpus_array"},
  211:   };
  212: 
  213:   for (i = 1; i < argc; i++) {
  214:     if (!strcmp("-p", prev_arg)) {
  215:       prefix = argv[i];
  216:     }
  217:     /**/
  218:     prev_arg = argv[i];
  219:   }
  220:   printf("file name prefix=[%s] you can change this by -p option.\n", prefix);
  221: 
  222:   create_file_dic(DIC_NAME, prefix,
  223:                   sizeof(entries)/sizeof(struct header_entry),
  224:                   entries);
  225: 
  226:   printf("%s done.\n", argv[0]);
  227:   return 0;
  228: }
Syntax (Markdown)