1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25: #include <stdlib.h>
26: #include <string.h>
27: #include <stdio.h>
28: #include <anthy/anthy.h>
29: #include <anthy/conf.h>
30: #include <anthy/xstr.h>
31: #include <anthy/xchar.h>
32: #include "dic_main.h"
33: #include "dic_ent.h"
34:
35:
36: static struct seq_ent unkseq_ent;
37: static struct seq_ent num_ent;
38: static struct seq_ent sep_ent;
39:
40: static wtype_t wt_num;
41:
42: static xchar narrow_wide_tab[]= {WIDE_0, WIDE_1, WIDE_2,
43: WIDE_3, WIDE_4, WIDE_5,
44: WIDE_6, WIDE_7, WIDE_8, WIDE_9};
45: static int kj_num_tab[]={KJ_0, KJ_1, KJ_2, KJ_3, KJ_4,
46: KJ_5, KJ_6, KJ_7, KJ_8, KJ_9};
47:
48: struct zipcode_line {
49: int nr;
50: xstr **strs;
51: };
52:
53:
54: static void
55: pushback_place_name(struct zipcode_line *zl, char *pn)
56: {
57: if (pn[0] == '#') {
58: return ;
59: }
60: zl->strs = realloc(zl->strs, sizeof(xstr *) * (zl->nr + 1));
61: zl->strs[zl->nr] = anthy_cstr_to_xstr(pn, ANTHY_EUC_JP_ENCODING);
62: zl->nr++;
63: }
64:
65:
66: static void
67: parse_zipcode_line(struct zipcode_line *zl, char *ln)
68: {
69: char buf[1000];
70: int i = 0;
71: while (*ln) {
72: buf[i] = *ln;
73: if (*ln == '\\') {
74: buf[i] = ln[1];
75: i ++;
76: if (ln[1]) {
77: ln ++;
78: }
79: } else if (*ln == ' ') {
80: buf[i] = 0;
81: i = 0;
82: pushback_place_name(zl, buf);
83: } else {
84: i ++;
85: }
86:
87: ln ++;
88: }
89: buf[i] = 0;
90: pushback_place_name(zl, buf);
91: }
92:
93:
94: static void
95: search_zipcode_dict(struct zipcode_line *zl, xstr* xs)
96: {
97: FILE *fp;
98: char buf[1000];
99: int len;
100: xstr *temp;
101: char *index;
102:
103: zl->nr = 0;
104: zl->strs = NULL;
105: fp = fopen(anthy_conf_get_str("ZIPDICT_EUC"), "r");
106: if (!fp) {
107: return ;
108: }
109:
110:
111: temp = anthy_xstr_wide_num_to_num(xs);
112: index = anthy_xstr_to_cstr(temp, 0);
113: len = strlen(index);
114:
115:
116: while (fgets(buf, 1000, fp)) {
117:
118: if (!strncmp(buf, index, len) && buf[len] == ' ') {
119:
120: buf[strlen(buf) - 1] = 0;
121: parse_zipcode_line(zl, &buf[len + 1]);
122: }
123: }
124: free(temp);
125: free(index);
126: fclose(fp);
127: }
128:
129:
130: static void
131: free_zipcode_line(struct zipcode_line *zl)
132: {
133: int i;
134: for (i = 0; i < zl->nr; i++) {
135: anthy_free_xstr(zl->strs[i]);
136: }
137: free(zl->strs);
138: }
139:
140: static int
141: gen_zipcode(xstr* xs, xstr *dest, int nth)
142: {
143: struct zipcode_line zl;
144:
145:
146: search_zipcode_dict(&zl, xs);
147:
148:
149: if (zl.nr > nth) {
150: dest->len = zl.strs[nth]->len;
151: dest->str = anthy_xstr_dup_str(zl.strs[nth]);
152: free_zipcode_line(&zl);
153: return 0;
154: } else {
155: free_zipcode_line(&zl);
156: return -1;
157: }
158: }
159:
160:
161:
162:
163: static xchar
164: narrow_num_to_wide_num(xchar xc)
165: {
166: if (xc > '9' || xc < '0') {
167: return WIDE_0;
168: }
169: return narrow_wide_tab[(int)(xc - '0')];
170: }
171:
172:
173: static xchar
174: wide_num_to_narrow_num(xchar xc)
175: {
176: int i;
177: for (i = 0; i < 10; i++) {
178: if (xc == narrow_wide_tab[i]) {
179: return i + '0';
180: }
181: }
182: return '0';
183: }
184:
185:
186:
187: static xchar
188: get_kj_num(int n)
189: {
190: if (n > 9 || n < 1) {
191: return KJ_10;
192: }
193: return kj_num_tab[n];
194: }
195:
196:
197:
198:
199: static void
200: compose_num_component(xstr *xs, long long num)
201: {
202: int n[4],i;
203: int a[4] = { 0 , KJ_10, KJ_100, KJ_1000};
204: for (i = 0; i < 4; i++) {
205: n[i] = num-(num/10)*10;
206: num /= 10;
207: }
208:
209: for (i = 3; i > 0; i--) {
210: if (n[i] > 0) {
211: if (n[i] > 1) {
212: anthy_xstrappend(xs, get_kj_num(n[i]));
213: }
214: anthy_xstrappend(xs, a[i]);
215: }
216: }
217:
218: if (n[0]) {
219: anthy_xstrappend(xs, get_kj_num(n[0]));
220: }
221: }
222:
223:
224: static int
225: gen_kanji_num(long long num, xstr *dest)
226: {
227: int i;
228: int n[10];
229: if (num < 1 || num >= 10000000000000000LL) {
230: return -1;
231: }
232:
233: for (i = 0; i < 10; i ++) {
234: n[i] = num-(num/10000)*10000;
235: num = num/10000;
236: }
237:
238: dest->len = 0;
239: dest->str = 0;
240:
241: if (n[3]) {
242: compose_num_component(dest, n[3]);
243: anthy_xstrappend(dest, KJ_1000000000000);
244: }
245:
246: if (n[2]) {
247: compose_num_component(dest, n[2]);
248: anthy_xstrappend(dest, KJ_100000000);
249: }
250:
251: if (n[1]) {
252: compose_num_component(dest, n[1]);
253: anthy_xstrappend(dest, KJ_10000);
254: }
255:
256: compose_num_component(dest, n[0]);
257: return 0;
258: }
259:
260: static int
261: get_nr_zipcode(xstr* xs)
262: {
263: struct zipcode_line zl;
264: int nr = 0;
265: if (xs->len != 3 && xs->len != 7) {
266: return 0;
267: }
268:
269: search_zipcode_dict(&zl, xs);
270:
271: nr = zl.nr;
272: free_zipcode_line(&zl);
273: return nr;
274: }
275:
276: static int
277: get_nr_num_ents(long long num)
278: {
279: if (num > 0 && num < 10000000000000000LL) {
280: if (num > 999) {
281:
282:
283: return 5;
284: } else {
285:
286: return 3;
287: }
288: } else {
289:
290: return 2;
291: }
292: }
293:
294:
295:
296:
297:
298: int
299: anthy_get_nr_dic_ents_of_ext_ent(seq_ent_t se, xstr *xs)
300: {
301: if (se == &unkseq_ent) {
302: return 1;
303: }
304: if (anthy_get_xstr_type(xs) & (XCT_NUM|XCT_WIDENUM)) {
305: long long num = anthy_xstrtoll(xs);
306: return get_nr_num_ents(num) + get_nr_zipcode(xs);
307: }
308: return 0;
309: }
310:
311:
312: static void
313: toggle_wide_narrow(xstr *dest, xstr *src)
314: {
315: int f, i;
316: dest->len = src->len;
317: dest->str = anthy_xstr_dup_str(src);
318: f = anthy_get_xstr_type(src) & XCT_WIDENUM;
319: for (i = 0; i < dest->len; i++) {
320: if (f) {
321: dest->str[i] = wide_num_to_narrow_num(src->str[i]);
322: } else {
323: dest->str[i] = narrow_num_to_wide_num(src->str[i]);
324: }
325: }
326: }
327:
328:
329: static int
330: gen_separated_num(long long num, xstr *dest, int full)
331: {
332: int width = 0, dot_count;
333: long long tmp;
334: int i, pos;
335:
336: if (num < 1000) {
337: return -1;
338: }
339:
340:
341: for (tmp = num; tmp != 0; tmp /= 10) {
342: width ++;
343: }
344:
345: dot_count = (width - 1) / 3;
346:
347: dest->len = dot_count + width;
348: dest->str = malloc(sizeof(xchar)*dest->len);
349:
350:
351: for (i = 0, pos = dest->len - 1; i < width; i++, pos --) {
352: int n = num % 10;
353:
354: if (i > 0 && (i % 3) == 0) {
355: if (full) {
356: dest->str[pos] = WIDE_COMMA;
357: } else {
358: dest->str[pos] = ',';
359: }
360: pos --;
361: }
362: if (full) {
363:
364: dest->str[pos] = narrow_wide_tab[n];
365: } else {
366:
367: dest->str[pos] = 48 + n;
368: }
369: num /= 10;
370: }
371: return 0;
372: }
373:
374:
375:
376:
377: int
378: anthy_get_nth_dic_ent_str_of_ext_ent(seq_ent_t se, xstr *xs,
379: int nth, xstr *dest)
380: {
381: if (nth == 0) {
382:
383: dest->len = xs->len;
384: dest->str = anthy_xstr_dup_str(xs);
385: return 0;
386: }
387: if (se == &unkseq_ent) {
388: switch(nth) {
389: case 1:
390:
391: return 0;
392: }
393: }
394: if (anthy_get_xstr_type(xs) & (XCT_NUM|XCT_WIDENUM)) {
395: long long num = anthy_xstrtoll(xs);
396:
397: switch(nth) {
398: case 1:
399:
400: toggle_wide_narrow(dest, xs);
401: return 0;
402: case 2:
403:
404: if (!gen_kanji_num(num, dest)) {
405: return 0;
406: }
407:
408: case 3:
409:
410: if (!gen_separated_num(num, dest, 0)) {
411: return 0;
412: }
413:
414: case 4:
415:
416: if (!gen_separated_num(num, dest, 1)) {
417: return 0;
418: }
419:
420: default:
421:
422: if (nth >= 5) {
423: if (xs->len == 3 || xs->len == 7) {
424: if (!gen_zipcode(xs, dest, nth-5)) {
425: return 0;
426: }
427: }
428: }
429: break;
430: }
431: return -1;
432: }
433: return 0;
434: }
435:
436: int
437: anthy_get_ext_seq_ent_indep(struct seq_ent *se)
438: {
439: if (se == &num_ent || se == &unkseq_ent) {
440: return 1;
441: }
442: return 0;
443: }
444:
445:
446: int
447: anthy_get_ext_seq_ent_ct(struct seq_ent *se, int pos, int ct)
448: {
449: if (anthy_get_ext_seq_ent_pos(se, pos) && ct == CT_NONE) {
450:
451:
452: return 10;
453: }
454: return 0;
455: }
456:
457:
458: int
459: anthy_get_ext_seq_ent_pos(struct seq_ent *se, int pos)
460: {
461:
462: if (se == &num_ent && pos == POS_NOUN) {
463: return 10;
464: }
465: if ((se == &unkseq_ent) && pos == POS_NOUN) {
466: return 10;
467: }
468: return 0;
469: }
470:
471:
472:
473:
474: seq_ent_t
475: anthy_get_ext_seq_ent_from_xstr(xstr *x, int is_reverse)
476: {
477: int t = anthy_get_xstr_type(x);
478:
479:
480: if (t & (XCT_NUM | XCT_WIDENUM)) {
481: return &num_ent;
482: }
483:
484: if (t & XCT_ASCII) {
485: return &unkseq_ent;
486: }
487: if (t & XCT_KATA) {
488: return &unkseq_ent;
489: }
490: if (!is_reverse) {
491:
492: if (t & XCT_KANJI) {
493: return &unkseq_ent;
494: }
495: }
496: if (x->len == 1) {
497:
498: return &sep_ent;
499: }
500: return 0;
501: }
502:
503: int
504: anthy_get_nth_dic_ent_wtype_of_ext_ent(xstr *xs, int nth,
505: wtype_t *wt)
506: {
507: int type;
508: (void)nth;
509: type = anthy_get_xstr_type(xs);
510: if (type & (XCT_NUM | XCT_WIDENUM)) {
511: *wt = wt_num;
512: return 0;
513: }
514: if (type & XCT_KATA) {
515: *wt = anthy_get_wtype(POS_NOUN, COS_NONE, SCOS_NONE, CC_NONE,
516: CT_NONE, WF_INDEP);
517: return 0;
518: }
519: return -1;
520: }
521:
522: int
523: anthy_get_nth_dic_ent_freq_of_ext_ent(struct seq_ent *se, int nth)
524: {
525: (void)se;
526: (void)nth;
527: return 100;
528: }
529:
530: int
531: anthy_get_ext_seq_ent_wtype(struct seq_ent *se, wtype_t w)
532: {
533: if (se == &num_ent) {
534: if (anthy_wtype_include(w, wt_num)) {
535:
536: return 10;
537: }
538: return 0;
539: }
540: if (anthy_wtype_get_pos(w) == POS_NOUN &&
541: anthy_wtype_get_cos(w) == COS_NONE &&
542: anthy_wtype_get_scos(w) == SCOS_NONE) {
543:
544: return 10;
545: }
546: return 0;
547: }
548:
549: void
550: anthy_init_ext_ent(void)
551: {
552:
553: unkseq_ent.seq_type = 0;
554: unkseq_ent.nr_dic_ents = 0;
555: num_ent.seq_type = 0;
556: num_ent.nr_dic_ents = 0;
557: sep_ent.seq_type = 0;
558: sep_ent.nr_dic_ents = 0;
559:
560: wt_num = anthy_init_wtype_by_name("数詞");
561: }