1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <anthy/anthy.h>
30: #include <anthy/word_dic.h>
31: #include "mkdic.h"
32:
33: extern FILE *page_out, *page_index_out;
34: extern FILE *yomi_entry_index_out, *yomi_entry_out;
35:
36: static int
37: write_word(struct word_entry *we, int encoding)
38: {
39: int count;
40: if (encoding == ANTHY_UTF8_ENCODING) {
41: count = fprintf(yomi_entry_out, "%s", we->word_utf8);
42: } else {
43: char *s = anthy_conv_utf8_to_euc(we->word_utf8);
44: count = fprintf(yomi_entry_out, "%s", s);
45: free(s);
46: }
47: return count;
48: }
49:
50: static int
51: write_freq(FILE *fp, struct word_entry *we)
52: {
53: int count = 0;
54: int freq = we->freq / 100;
55: if (freq != 1) {
56: count += fprintf(fp, "*%d", freq);
57: }
58: return count;
59: }
60:
61: static int
62: compare_word_entry(struct word_entry *prev_we,
63: struct word_entry *we)
64: {
65: if (strcmp(prev_we->wt_name, we->wt_name) ||
66: (prev_we->freq / 100) != (we->freq / 100) ||
67: prev_we->feature != we->feature) {
68: return 1;
69: }
70: return 0;
71: }
72:
73:
74:
75:
76: static int
77: output_word_entry_for_a_yomi(struct yomi_entry *ye, int encoding)
78: {
79: int i;
80: int count = 0;
81:
82: if (!ye) {
83: return 0;
84: }
85: if (encoding == ANTHY_UTF8_ENCODING) {
86: count ++;
87: fputc('u', yomi_entry_out);
88: }
89:
90: for (i = 0; i < ye->nr_entries; i++) {
91: struct word_entry *we = &ye->entries[i];
92: struct word_entry *prev_we = NULL;
93: if (i != 0) {
94: prev_we = &ye->entries[i-1];
95: }
96:
97: if (!we->raw_freq) {
98: continue;
99: }
100: if (i > 0) {
101:
102: count += fprintf(yomi_entry_out, " ");
103: }
104:
105: if (i == 0 ||
106: compare_word_entry(prev_we, we)) {
107: count += fprintf(yomi_entry_out, "%s", we->wt_name);
108: if (we->feature != 0) {
109: count += fprintf(yomi_entry_out, ",");
110: }
111: count += write_freq(yomi_entry_out, we);
112: count += fprintf(yomi_entry_out, " ");
113: }
114:
115: we->offset = count + ye->offset;
116:
117: count += write_word(we, encoding);
118: }
119:
120: fputc(0, yomi_entry_out);
121: return count + 1;
122: }
123:
124:
125: static int
126: common_len(xstr *s1, xstr *s2)
127: {
128: int m,i;
129: if (!s1 || !s2) {
130: return 0;
131: }
132: if (s1->len < s2->len) {
133: m = s1->len;
134: }else{
135: m = s2->len;
136: }
137: for (i = 0; i < m; i++) {
138: if (s1->str[i] != s2->str[i]) {
139: return i;
140: }
141: }
142: return m;
143: }
144:
145:
146:
147:
148:
149:
150:
151: static int
152: output_diff(xstr *p, xstr *c, int encoding)
153: {
154: int i, m, len = 1;
155: m = common_len(p, c);
156: if (p && p->len > m) {
157: fprintf(page_out, "%c", p->len - m + 1);
158: } else {
159: fprintf(page_out, "%c", 1);
160: }
161: for (i = m; i < c-> len; i++) {
162: char buf[8];
163: len += anthy_sputxchar(buf, c->str[i], encoding);
164: fputs(buf, page_out);
165: }
166: return len;
167: }
168:
169: static void
170: begin_new_page(int i)
171: {
172: fputc(0, page_out);
173: write_nl(page_index_out, i);
174: }
175:
176: static void
177: output_entry_index(int i)
178: {
179: write_nl(yomi_entry_index_out, i);
180: }
181:
182:
183:
184:
185: static void
186: generate_yomi_to_offset_map(struct yomi_entry_list *yl)
187: {
188: int i;
189: struct yomi_entry *ye = NULL;
190: xstr *prev = NULL;
191: int page_index = 0;
192:
193:
194:
195: write_nl(page_index_out, page_index);
196:
197: for (i = 0; i < yl->nr_valid_entries; i++) {
198: ye = yl->ye_array[i];
199:
200: if ((i % WORDS_PER_PAGE) == 0 && (i != 0)) {
201: page_index ++;
202: prev = NULL;
203: begin_new_page(page_index);
204: }
205:
206:
207: page_index += output_diff(prev, ye->index_xstr, yl->index_encoding);
208:
209: output_entry_index(ye->offset);
210:
211: prev = ye->index_xstr;
212: }
213: }
214:
215:
216:
217: void
218: output_word_dict(struct yomi_entry_list *yl)
219: {
220: int entry_index = 0;
221: int i;
222: struct yomi_entry *ye = NULL;
223:
224:
225: for (i = 0; i < yl->nr_valid_entries; i++) {
226:
227: ye = yl->ye_array[i];
228: ye->offset = entry_index;
229: entry_index += output_word_entry_for_a_yomi(ye, yl->body_encoding);
230: }
231:
232: generate_yomi_to_offset_map(yl);
233:
234:
235: entry_index += output_word_entry_for_a_yomi(ye, yl->body_encoding);
236: write_nl(yomi_entry_index_out, entry_index);
237: write_nl(page_index_out, 0);
238:
239:
240: printf("Total %d indexes, %d words, (%d pages).\n",
241: yl->nr_valid_entries,
242: yl->nr_words,
243: yl->nr_valid_entries / WORDS_PER_PAGE + 1);
244: }