1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37: #include <stdlib.h>
38: #include <stdio.h>
39: #include <string.h>
40:
41: #include <anthy/anthy.h>
42: #include <anthy/conf.h>
43: #include <anthy/dic.h>
44: #include <anthy/texttrie.h>
45: #include <anthy/textdict.h>
46: #include <anthy/dicutil.h>
47:
48: #include "dic_main.h"
49: #include "dic_personality.h"
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61: #define MAX_KEY_LEN 96
62:
63: static int gIsInit;
64: static int dic_util_encoding;
65:
66: extern struct text_trie *anthy_private_tt_dic;
67: extern struct textdict *anthy_private_text_dic;
68:
69: static struct iterate_contex {
70:
71: int in_tt;
72:
73: char key_buf[MAX_KEY_LEN+32];
74:
75: int dicfile_offset;
76: char *current_index;
77: char *current_line;
78: } word_iterator;
79:
80: struct scan_context {
81: const char *yomi;
82: const char *word;
83: const char *wt_name;
84: int offset;
85: int found_word;
86: };
87:
88: static void
89: set_current_line(const char *index, const char *line)
90: {
91: if (word_iterator.current_line) {
92: free(word_iterator.current_line);
93: word_iterator.current_line = NULL;
94: }
95: if (line) {
96: word_iterator.current_line = strdup(line);
97: }
98: if (word_iterator.current_index) {
99: free(word_iterator.current_index);
100: word_iterator.current_index = NULL;
101: }
102: if (index) {
103: word_iterator.current_index = strdup(index);
104: }
105: }
106:
107:
108: void
109: anthy_dic_util_init(void)
110: {
111: if (gIsInit) {
112: return ;
113: }
114: if (anthy_init_dic() == -1) {
115: return ;
116: }
117: anthy_dic_set_personality("default");
118: gIsInit = 1;
119: dic_util_encoding = ANTHY_EUC_JP_ENCODING;
120:
121: word_iterator.key_buf[0] = 0;
122: word_iterator.in_tt = 1;
123: }
124:
125:
126: void
127: anthy_dic_util_quit(void)
128: {
129: if (gIsInit) {
130: anthy_quit_dic();
131: }
132: set_current_line(NULL, NULL);
133: gIsInit = 0;
134: }
135:
136:
137: int
138: anthy_dic_util_set_encoding(int enc)
139: {
140: if (enc == ANTHY_UTF8_ENCODING ||
141: enc == ANTHY_EUC_JP_ENCODING) {
142: dic_util_encoding = enc;
143: }
144: return dic_util_encoding;
145: }
146:
147: void
148: anthy_dic_util_set_personality(const char *id)
149: {
150: anthy_dic_set_personality(id);
151: }
152:
153: static char *
154: find_next_key(const char *prefix)
155: {
156: char *v;
157: v = anthy_trie_find_next_key(anthy_private_tt_dic,
158: word_iterator.key_buf, MAX_KEY_LEN+32);
159:
160: if (v && v[0] == prefix[0] && v[1] == prefix[1]) {
161:
162: return v;
163: }
164:
165: sprintf(word_iterator.key_buf, prefix);
166: return NULL;
167: }
168:
169: static void
170: delete_prefix(const char *prefix)
171: {
172: sprintf(word_iterator.key_buf, prefix);
173: anthy_priv_dic_lock();
174:
175:
176: while (find_next_key(prefix)) {
177: anthy_trie_delete(anthy_private_tt_dic, word_iterator.key_buf);
178: sprintf(word_iterator.key_buf, prefix);
179: }
180: anthy_priv_dic_unlock();
181: }
182:
183: static const char *
184: encoding_prefix(int encoding)
185: {
186: if (encoding == ANTHY_UTF8_ENCODING) {
187: return " p";
188: }
189:
190: return " ";
191: }
192:
193:
194: void
195: anthy_priv_dic_delete(void)
196: {
197: delete_prefix(encoding_prefix(ANTHY_EUC_JP_ENCODING));
198:
199: while (!anthy_textdict_delete_line(anthy_private_text_dic, 0)) {
200:
201: }
202: }
203:
204: static int
205: scan_one_word_cb(void *p, int next_offset, const char *key, const char *n)
206: {
207: (void)p;
208: set_current_line(key, n);
209: word_iterator.dicfile_offset = next_offset;
210: return -1;
211: }
212:
213: static int
214: select_first_entry_in_textdict(void)
215: {
216: word_iterator.dicfile_offset = 0;
217: set_current_line(NULL, NULL);
218: anthy_textdict_scan(anthy_private_text_dic,
219: word_iterator.dicfile_offset, NULL,
220: scan_one_word_cb);
221: if (word_iterator.current_line) {
222: word_iterator.in_tt = 0;
223: return 0;
224: }
225:
226: return ANTHY_DIC_UTIL_ERROR;
227: }
228:
229:
230: int
231: anthy_priv_dic_select_first_entry(void)
232: {
233: if (dic_util_encoding == ANTHY_UTF8_ENCODING) {
234: return select_first_entry_in_textdict();
235: }
236: if (anthy_private_tt_dic) {
237: sprintf(word_iterator.key_buf, encoding_prefix(dic_util_encoding));
238:
239: if (find_next_key(encoding_prefix(dic_util_encoding))) {
240: word_iterator.in_tt = 1;
241: return 0;
242: }
243: }
244:
245: return select_first_entry_in_textdict();
246: }
247:
248:
249: int
250: anthy_priv_dic_select_next_entry(void)
251: {
252: if (!word_iterator.in_tt) {
253: set_current_line(NULL, NULL);
254: anthy_textdict_scan(anthy_private_text_dic, word_iterator.dicfile_offset,
255: NULL,
256: scan_one_word_cb);
257: if (word_iterator.current_line) {
258: return 0;
259: }
260: return ANTHY_DIC_UTIL_ERROR;
261: }
262: if (find_next_key(encoding_prefix(dic_util_encoding))) {
263: return 0;
264: }
265:
266: return select_first_entry_in_textdict();
267: }
268:
269:
270: int
271: anthy_priv_dic_select_entry(const char *index)
272: {
273: (void)index;
274: return 0;
275: }
276:
277:
278: char *
279: anthy_priv_dic_get_index(char *buf, int len)
280: {
281: int i;
282: char *src_buf;
283: if (word_iterator.in_tt) {
284: src_buf = &word_iterator.key_buf[2];
285: } else {
286: src_buf = word_iterator.current_index;
287: }
288: if (!word_iterator.in_tt && dic_util_encoding == ANTHY_EUC_JP_ENCODING) {
289:
290: src_buf = anthy_conv_utf8_to_euc(src_buf);
291: } else {
292: src_buf = strdup(src_buf);
293: }
294:
295: for (i = 0; src_buf[i] && src_buf[i] != ' '; i++) {
296: if (i >= len - 1) {
297: free(src_buf);
298: return NULL;
299: }
300: buf[i] = src_buf[i];
301: }
302: buf[i] = 0;
303: free(src_buf);
304: return buf;
305: }
306:
307:
308: int
309: anthy_priv_dic_get_freq(void)
310: {
311: struct word_line res;
312: char *v;
313: if (word_iterator.in_tt) {
314: v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf);
315: anthy_parse_word_line(v, &res);
316: free(v);
317: } else {
318: anthy_parse_word_line(word_iterator.current_line, &res);
319: }
320: return res.freq;
321: }
322:
323:
324: char *
325: anthy_priv_dic_get_wtype(char *buf, int len)
326: {
327: struct word_line res;
328: char *v;
329: if (word_iterator.in_tt) {
330: v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf);
331: anthy_parse_word_line(v, &res);
332: free(v);
333: } else {
334: anthy_parse_word_line(word_iterator.current_line, &res);
335: }
336: if (len - 1 < (int)strlen(res.wt)) {
337: return NULL;
338: }
339: sprintf(buf, "%s", res.wt);
340: return buf;
341: }
342:
343:
344: char *
345: anthy_priv_dic_get_word(char *buf, int len)
346: {
347: char *v;
348: char *s;
349: if (word_iterator.in_tt) {
350: v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf);
351: } else {
352: v = word_iterator.current_line;
353: }
354: if (!v) {
355: return NULL;
356: }
357:
358: s = strchr(v, ' ');
359: s++;
360: if (!word_iterator.in_tt && dic_util_encoding == ANTHY_EUC_JP_ENCODING) {
361: s = anthy_conv_utf8_to_euc(s);
362: snprintf(buf, len, "%s", s);
363: free(s);
364: } else {
365: snprintf(buf, len, "%s", s);
366: }
367: if (word_iterator.in_tt) {
368: free(v);
369: }
370: return buf;
371: }
372:
373: static int
374: find_cb(void *p, int next_offset, const char *key, const char *n)
375: {
376: struct scan_context *sc = p;
377: struct word_line res;
378: if (strcmp(key, sc->yomi)) {
379: sc->offset = next_offset;
380: return 0;
381: }
382: anthy_parse_word_line(n, &res);
383: if (!strcmp(res.wt, sc->wt_name) &&
384: !strcmp(res.word, sc->word)) {
385: sc->found_word = 1;
386: return -1;
387: }
388: sc->offset = next_offset;
389: return 0;
390: }
391:
392: static int
393: order_cb(void *p, int next_offset, const char *key, const char *n)
394: {
395: struct scan_context *sc = p;
396: (void)n;
397: if (strcmp(key, sc->yomi) >= 0) {
398: sc->found_word = 1;
399: return -1;
400: }
401: sc->offset = next_offset;
402: return 0;
403: }
404:
405:
406: static int
407: do_add_word_to_textdict(struct textdict *td, int offset,
408: const char *yomi, const char *word,
409: const char *wt_name, int freq)
410: {
411: char *buf = malloc(strlen(yomi) + strlen(word) + strlen(wt_name) + 20);
412: int rv;
413: if (!buf) {
414: return -1;
415: }
416: sprintf(buf, "%s %s*%d %s\n", yomi, wt_name, freq, word);
417: rv = anthy_textdict_insert_line(td, offset, buf);
418: free(buf);
419: return rv;
420: }
421:
422: static int
423: dup_word_check(const char *v, const char *word, const char *wt)
424: {
425: struct word_line res;
426:
427: if (anthy_parse_word_line(v, &res)) {
428: return 0;
429: }
430:
431:
432: if (!strcmp(res.wt, wt) &&
433: !strcmp(res.word, word)) {
434: return 1;
435: }
436: return 0;
437: }
438:
439: static int
440: find_same_word(char *idx_buf, const char *yomi,
441: const char *word, const char *wt_name, int yomi_len)
442: {
443: int found = 0;
444: sprintf(idx_buf, "%s%s ",
445: encoding_prefix(dic_util_encoding),
446: yomi);
447: anthy_trie_find_next_key(anthy_private_tt_dic,
448: idx_buf, yomi_len + 12);
449:
450:
451: do {
452: char *v;
453: if (strncmp(&idx_buf[2], yomi, yomi_len) ||
454: idx_buf[yomi_len+2] != ' ') {
455:
456: break;
457: }
458:
459: v = anthy_trie_find(anthy_private_tt_dic, idx_buf);
460: if (v) {
461: found = dup_word_check(v, word, wt_name);
462: free(v);
463: if (found) {
464: break;
465: }
466: }
467: } while (anthy_trie_find_next_key(anthy_private_tt_dic,
468: idx_buf, yomi_len + 12));
469:
470: return found;
471: }
472:
473: static int
474: add_word_to_textdict(const char *yomi, const char *word,
475: const char *wt_name, int freq)
476: {
477: struct scan_context sc;
478: int rv;
479: int yomi_len = strlen(yomi);
480:
481: if (yomi_len > MAX_KEY_LEN || yomi_len == 0) {
482: return ANTHY_DIC_UTIL_ERROR;
483: }
484:
485: if (wt_name[0] != '#') {
486: return ANTHY_DIC_UTIL_ERROR;
487: }
488:
489:
490: if (anthy_private_tt_dic) {
491: char *idx_buf = malloc(yomi_len + 12);
492: if (find_same_word(idx_buf, yomi, word, wt_name, yomi_len)) {
493: anthy_trie_delete(anthy_private_tt_dic, idx_buf);
494: }
495: free(idx_buf);
496: }
497:
498:
499: sc.yomi = yomi;
500: sc.word = word;
501: sc.wt_name = wt_name;
502:
503: sc.offset = 0;
504: sc.found_word = 0;
505: anthy_textdict_scan(anthy_private_text_dic, 0, &sc,
506: find_cb);
507: if (sc.found_word == 1) {
508: anthy_textdict_delete_line(anthy_private_text_dic, sc.offset);
509: }
510: if (freq == 0) {
511: return ANTHY_DIC_UTIL_OK;
512: }
513:
514: sc.offset = 0;
515: sc.found_word = 0;
516: anthy_textdict_scan(anthy_private_text_dic, 0, &sc,
517: order_cb);
518:
519: rv = do_add_word_to_textdict(anthy_private_text_dic, sc.offset,
520: yomi, word, wt_name, freq);
521: if (!rv) {
522: return ANTHY_DIC_UTIL_OK;
523: }
524: return ANTHY_DIC_UTIL_ERROR;
525: }
526:
527:
528:
529:
530: int
531: anthy_priv_dic_add_entry(const char *yomi, const char *word,
532: const char *wt_name, int freq)
533: {
534: if (dic_util_encoding == ANTHY_UTF8_ENCODING) {
535: return add_word_to_textdict(yomi, word, wt_name, freq);
536: } else {
537: int rv;
538: char *yomi_utf8 = anthy_conv_euc_to_utf8(yomi);
539: char *word_utf8 = anthy_conv_euc_to_utf8(word);
540: rv = add_word_to_textdict(yomi_utf8, word_utf8, wt_name, freq);
541: free(yomi_utf8);
542: free(word_utf8);
543: return rv;
544: }
545: }
546:
547: const char *
548: anthy_dic_util_get_anthydir(void)
549: {
550: return anthy_conf_get_str("ANTHYDIR");
551: }
552:
553:
554: static char *
555: do_search(FILE *fp, const char *word)
556: {
557: char buf[32];
558: char *res = NULL;
559: int word_len = strlen(word);
560: while (fgets(buf, 32, fp)) {
561: int len = strlen(buf);
562: buf[len - 1] = 0;
563: len --;
564: if (len > word_len) {
565: continue;
566: }
567: if (!strncasecmp(buf, word, len)) {
568: if (res) {
569: free(res);
570: }
571: res = strdup(buf);
572: }
573: }
574: return res;
575: }
576:
577:
578: char *
579: anthy_dic_search_words_file(const char *word)
580: {
581: FILE *fp;
582: char *res;
583: const char *words_dict_fn = anthy_conf_get_str("WORDS_FILE");
584: if (!words_dict_fn) {
585: return NULL;
586: }
587: fp = fopen(words_dict_fn, "r");
588: if (!fp) {
589: return NULL;
590: }
591: res = do_search(fp, word);
592: fcl