1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33: #include <stdio.h>
34: #include <stdlib.h>
35: #include <string.h>
36:
37: #include <anthy/anthy.h>
38: #include <anthy/dicutil.h>
39:
40: #include <anthy/xstr.h>
41: #include "config.h"
42:
43: #define UNSPEC 0
44: #define DUMP_DIC 1
45: #define LOAD_DIC 2
46: #define APPEND_DIC 3
47:
48: #define TYPETAB "typetab"
49: #define USAGE_TEXT "dic-tool-usage.txt"
50:
51: #define USAGE \
52: "Anthy-dic-util [options]\n"\
53: " --help: Show this usage text\n"\
54: " --version: Show version\n"\
55: " --dump: Dump dictionary\n"\
56: " --load: Load dictionary\n"\
57: " --append: Append dictionary\n"\
58: " --utf8: Use utf8 encoding\n"\
59: " --personality=NAME: use NAME as a name of personality\n"
60:
61:
62: static int command = UNSPEC;
63: static int encoding = ANTHY_EUC_JP_ENCODING;
64: static FILE *fp_in;
65: static char *fn;
66: static const char *personality = "";
67:
68:
69: struct var{
70: struct var *next;
71: char *var_name;
72: char *val;
73: };
74:
75:
76: struct trans_tab {
77: struct trans_tab *next;
78: char *type_name;
79: struct var var_list;
80: }trans_tab_list;
81:
82: static void
83: print_usage(void)
84: {
85: printf(USAGE);
86: exit(0);
87: }
88:
89: static FILE *
90: open_typetab(void)
91: {
92: FILE *fp;
93: char *fn;
94: fp = fopen(TYPETAB, "r");
95: if (fp) {
96: return fp;
97: }
98: fn = strdup(anthy_dic_util_get_anthydir());
99: fn = realloc(fn, strlen(fn) + strlen(TYPETAB) + 4);
100: strcat(fn, "/");
101: strcat(fn, TYPETAB);
102: fp = fopen(fn, "r");
103: return fp;
104: }
105:
106: static FILE *
107: open_usage_file(void)
108: {
109: FILE *fp;
110:
111: fp = fopen(USAGE_TEXT, "r");
112: if (!fp) {
113:
114: char *fn;
115: fn = strdup(anthy_dic_util_get_anthydir());
116: fn = realloc(fn, strlen(fn) + strlen(USAGE_TEXT) + 10);
117: strcat(fn, "/" USAGE_TEXT);
118: fp = fopen(fn, "r");
119: }
120: return fp;
121: }
122:
123: static void
124: print_usage_text(void)
125: {
126: char buf[256];
127: FILE *fp = open_usage_file();
128: if (!fp) {
129: printf("# Anthy-dic-tool\n#\n");
130: return ;
131: }
132: fprintf(stdout, "#" PACKAGE " " VERSION "\n");
133: if (encoding == ANTHY_UTF8_ENCODING) {
134: } else {
135: }
136:
137: while (fgets(buf, 256, fp)) {
138: if (encoding == ANTHY_UTF8_ENCODING) {
139: char *s;
140: s = anthy_conv_euc_to_utf8(buf);
141: printf("%s", s);
142: free(s);
143: } else {
144: printf("%s", buf);
145: }
146: }
147: fclose(fp);
148: }
149:
150: static char *
151: read_line(char *buf, int len, FILE *fp)
152: {
153: while (fgets(buf, len, fp)) {
154: if (buf[0] != '#') {
155:
156: int l = strlen(buf);
157: if (l > 0 && buf[l-1] == '\n') {
158: buf[l-1] = 0;
159: }
160: if (l > 1 && buf[l-2] == '\r') {
161: buf[l-1] = 0;
162: }
163:
164: return buf;
165: }
166: }
167: return NULL;
168: }
169:
170: static int
171: read_typetab_var(struct var *head, FILE *fp, int table)
172: {
173: char buf[256];
174: char var[256], eq[256], val[256];
175: struct var *v;
176: if (!read_line(buf, 256, fp)) {
177: return -1;
178: }
179: if (sscanf(buf, "%s %s %s", var, eq, val) != 3) {
180: return -1;
181: }
182:
183: v = malloc(sizeof(struct var));
184: if (encoding == ANTHY_UTF8_ENCODING && table) {
185:
186: v->var_name = anthy_conv_euc_to_utf8(var);
187: v->val = anthy_conv_euc_to_utf8(val);
188: } else {
189:
190: v->var_name = strdup(var);
191: v->val = strdup(val);
192: }
193:
194:
195: v->next = head->next;
196: head->next = v;
197:
198: return 0;
199: }
200:
201: static int
202: read_typetab_entry(FILE *fp)
203: {
204: char buf[256], type_name[257];
205: char *res;
206: struct trans_tab *t;
207:
208: do {
209: res = read_line(buf, 256, fp);
210: if (!res) {
211: return -1;
212: }
213: } while (res[0] == '#' || res[0] == 0);
214: t = malloc(sizeof(struct trans_tab));
215: sprintf(type_name, "#%s", buf);
216: t->type_name = strdup(type_name);
217: t->var_list.next = 0;
218:
219: while(!read_typetab_var(&t->var_list, fp, 1));
220:
221: t->next = trans_tab_list.next;
222: trans_tab_list.next = t;
223: return 0;
224: }
225:
226: static void
227: read_typetab(void)
228: {
229: FILE *fp = open_typetab();
230: if (!fp) {
231: printf("Failed to open type table.\n");
232: exit(1);
233: }
234: while (!read_typetab_entry(fp));
235: }
236:
237: static struct trans_tab *
238: find_trans_tab_by_name(char *name)
239: {
240: struct trans_tab *t;
241: for (t = trans_tab_list.next; t; t = t->next) {
242: if (!strcmp(t->type_name, name)) {
243: return t;
244: }
245: }
246: return NULL;
247: }
248:
249: static void
250: print_word_type(struct trans_tab *t)
251: {
252: struct var *v;
253: for (v = t->var_list.next; v; v = v->next) {
254: printf("%s\t=\t%s\n", v->var_name, v->val);
255: }
256: }
257:
258: static void
259: dump_dic(void)
260: {
261: print_usage_text();
262: if (anthy_priv_dic_select_first_entry() == -1) {
263: printf("# Failed to read private dictionary\n"
264: "# There are no words or error occured?\n"
265: "#\n");
266: return ;
267: }
268: do {
269: char idx[100], wt[100], w[100];
270: int freq;
271: if (anthy_priv_dic_get_index(idx, 100) &&
272: anthy_priv_dic_get_wtype(wt, 100) &&
273: anthy_priv_dic_get_word(w, 100)) {
274: struct trans_tab *t;
275: freq = anthy_priv_dic_get_freq();
276: t = find_trans_tab_by_name(wt);
277: if (t) {
278: printf("%s %d %s\n", idx, freq, w);
279: print_word_type(t);
280: printf("\n");
281: } else {
282: printf("# Failed to determine word type of %s(%s).\n", w, wt);
283: }
284: }
285: } while (anthy_priv_dic_select_next_entry() == 0);
286: }
287:
288: static void
289: open_input_file(void)
290: {
291: if (!fn) {
292: fp_in = stdin;
293: } else {
294: fp_in = fopen(fn, "r");
295: if (!fp_in) {
296: exit(1);
297: }
298: }
299: }
300:
301:
302: static int
303: match_var(struct var *v, struct var *s)
304: {
305: struct var *i;
306: for (i = s->next; i; i = i->next) {
307: if (!strcmp(v->var_name, i->var_name) &&
308: !strcmp(v->val, i->val)) {
309: return 1;
310: }
311: }
312: return 0;
313: }
314:
315:
316: static int
317: var_list_subset_p(struct var *v1, struct var *v2)
318: {
319: struct var *v;
320: for (v = v1->next; v; v = v->next) {
321: if (!match_var(v, v2)) {
322: return 0;
323: }
324: }
325: return 1;
326: }
327:
328: static char *
329: find_wt(void)
330: {
331: struct var v;
332: struct trans_tab *t;
333: v.next = 0;
334: while(!read_typetab_var(&v, fp_in, 0));
335: for (t = trans_tab_list.next; t; t = t->next) {
336: if (var_list_subset_p(&t->var_list, &v) &&
337: var_list_subset_p(&v, &t->var_list)) {
338: return t->type_name;
339: }
340: }
341: return NULL;
342: }
343:
344: static int
345: find_head(char *yomi, char *freq, char *w)
346: {
347: char buf[256];
348: do {
349: if (!read_line(buf, 256, fp_in)) {
350: return -1;
351: }
352: } while (sscanf(buf, "%s %s %[^\n]",yomi, freq, w) != 3);
353: return 0;
354: }
355:
356: static void
357: load_dic(void)
358: {
359: char yomi[256], freq[256], w[256];
360: while (!find_head(yomi, freq, w)) {
361: char *wt = find_wt();
362: if (wt) {
363: int ret;
364: ret = anthy_priv_dic_add_entry(yomi, w, wt, atoi(freq));
365: if (ret == -1) {
366: printf("Failed to register %s\n", yomi);
367: }else {
368: printf("Word %s is registered as %s\n", yomi, wt);
369: }
370: } else {
371: printf("Failed to find the type of %s.\n", yomi);
372: }
373: }
374: }
375:
376: static void
377: print_version(void)
378: {
379: printf("Anthy-dic-util "VERSION".\n");
380: exit(0);
381: }
382:
383: static void
384: parse_args(int argc, char **argv)
385: {
386: int i;
387: for (i = 1 ; i < argc ; i++) {
388: if (!strncmp(argv[i], "--", 2)) {
389: char *opt = &argv[i][2];
390: if (!strcmp(opt, "help")) {
391: print_usage();
392: } else if (!strcmp(opt, "version")){
393: print_version();
394: } else if (!strcmp(opt, "dump")) {
395: command = DUMP_DIC;
396: } else if (!strcmp(opt,"append") ){
397: command = APPEND_DIC;
398: } else if (!strncmp(opt, "personality=", 12)) {
399: personality = &opt[12];
400: } else if (!strcmp(opt, "utf8")) {
401: encoding = ANTHY_UTF8_ENCODING;
402: } else if (!strcmp(opt, "eucjp")) {
403: encoding = ANTHY_EUC_JP_ENCODING;
404: } else if (!strcmp(opt, "load")) {
405: command = LOAD_DIC;
406: }
407: }else{
408: fn = argv[i];
409: }
410: }
411: }
412:
413: static void
414: init_lib(void)
415: {
416: anthy_dic_util_init();
417: anthy_dic_util_set_encoding(encoding);
418: read_typetab();
419: }
420:
421: int
422: main(int argc,char **argv)
423: {
424: fp_in = stdin;
425: parse_args(argc, argv);
426:
427: switch (command) {
428: case DUMP_DIC:
429: init_lib();
430: dump_dic();
431: break;
432: case LOAD_DIC:
433: init_lib();
434: anthy_priv_dic_delete();
435: open_input_file();
436: load_dic();
437: break;
438: case APPEND_DIC:
439: init_lib();
440: open_input_file();
441: load_dic();
442: break;
443: case UNSPEC:
444: default:
445: print_usage();
446: }
447: return 0;
448: }