1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38: #include <sys/types.h>
39: #include <sys/stat.h>
40: #include <errno.h>
41: #include <unistd.h>
42: #include <string.h>
43: #include <stdio.h>
44: #include <stdlib.h>
45:
46: #include "config.h"
47: #include <anthy/anthy.h>
48: #include <anthy/dic.h>
49: #include <anthy/alloc.h>
50: #include <anthy/conf.h>
51: #include <anthy/ruleparser.h>
52: #include <anthy/record.h>
53: #include <anthy/logger.h>
54: #include <anthy/prediction.h>
55:
56: #include "dic_main.h"
57: #include "dic_personality.h"
58:
59:
60: #define ENCODING_SUFFIX ".utf8"
61:
62:
63: enum val_type {
64: RT_EMPTY, RT_VAL, RT_XSTR, RT_XSTRP
65: };
66:
67:
68: struct record_val {
69: enum val_type type;
70: union {
71: xstr str;
72: int val;
73: xstr* strp;
74: } u;
75: };
76:
77:
78: struct record_row {
79: xstr key;
80: int nr_vals;
81: struct record_val *vals;
82: };
83:
84:
85: struct trie_node {
86: struct trie_node *l;
87: struct trie_node *r;
88: int bit;
89: struct record_row row;
90: struct trie_node *lru_prev, *lru_next;
91: int dirty;
92: };
93:
94:
95: struct trie_root {
96: struct trie_node root;
97: allocator node_ator;
98: };
99:
100: #define LRU_USED 0x01
101: #define LRU_SUSED 0x02
102: #define PROTECT 0x04
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129: struct record_section {
130: const char *name;
131: struct trie_root cols;
132: struct record_section *next;
133: int lru_nr_used, lru_nr_sused;
134: };
135:
136:
137: struct record_stat {
138: struct record_section section_list;
139: struct record_section *cur_section;
140: struct trie_root xstrs;
141: struct trie_node *cur_row;
142: int row_dirty;
143: int encoding;
144:
145: int is_anon;
146: const char *id;
147: char *base_fn;
148: char *journal_fn;
149:
150: time_t base_timestamp;
151: int last_update;
152: time_t journal_timestamp;
153: };
154:
155:
156: #define FILE2_LIMIT 102400
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245: static allocator record_ator;
246:
247:
248: static void init_trie_root(struct trie_root *n);
249: static int trie_key_nth_bit(xstr* key, int n);
250: static int trie_key_first_diff_bit_1byte(xchar c1, xchar c2);
251: static int trie_key_first_diff_bit(xstr *k1, xstr *k2);
252: static int trie_key_cmp(xstr *k1, xstr *k2);
253: static void trie_key_dup(xstr *dst, xstr *src);
254: static void trie_row_init(struct record_row *rc);
255: static void trie_row_free(struct record_row *rc);
256: static struct trie_node *trie_find(struct trie_root *root, xstr *key);
257: static struct trie_node *trie_insert(struct trie_root *root, xstr *key,
258: int dirty, int *nr_used, int *nr_sused);
259: static void trie_remove(struct trie_root *root, xstr *key,
260: int *nr_used, int *nr_sused);
261: static struct trie_node *trie_first(struct trie_root *root);
262: static struct trie_node *trie_next(struct trie_root *root,
263: struct trie_node *cur);
264: static void trie_remove_all(struct trie_root *root,
265: int *nr_used, int *nr_sused);
266: static void trie_remove_old(struct trie_root *root, int count,
267: int* nr_used, int* nr_sused);
268: static void trie_mark_used(struct trie_root *root, struct trie_node *n,
269: int *nr_used, int *nr_sused);
270:
271:
272:
273:
274:
275:
276:
277:
278: #define PUTNODE(x) ((x) == &root->root ? printf("root\n") : anthy_putxstrln(&(x)->row.key))
279: static int
280: debug_trie_dump(FILE* fp, struct trie_node* n, int encoding)
281: {
282: int cnt = 0;
283: char buf[1024];
284:
285: if (n->l->bit > n->bit) {
286: cnt = debug_trie_dump(fp, n->l, encoding);
287: } else {
288: if (n->l->row.key.len == -1) {
289: if (fp) {
290: fprintf(fp, "root\n");
291: }
292: } else {
293: if (fp) {
294: anthy_sputxstr(buf, &n->l->row.key, encoding);
295: fprintf(fp, "%s\n", buf);
296: }
297: cnt = 1;
298: }
299: }
300:
301: if (n->r->bit > n->bit) {
302: return cnt + debug_trie_dump(fp, n->r, encoding);
303: } else {
304: if (n->r->row.key.len == -1) {
305: if(fp) {
306: fprintf(fp, "root\n");
307: }
308: } else {
309: if(fp) {
310: anthy_sputxstr(buf, &n->r->row.key, encoding);
311: fprintf(fp, "%s\n", buf);
312: }
313: return cnt + 1;
314: }
315: }
316:
317: return cnt;
318: }
319:
320: static void
321: init_trie_root(struct trie_root *root)
322: {
323: struct trie_node* n;
324: root->node_ator = anthy_create_allocator(sizeof(struct trie_node), NULL);
325: n = &root->root;
326: n->l = n;
327: n->r = n;
328: n->bit = 0;
329: n->lru_next = n;
330: n->lru_prev = n;
331: n->dirty = 0;
332: trie_row_init(&n->row);
333: n->row.key.len = -1;
334: }
335:
336:
337:
338:
339:
340:
341:
342:
343:
344: static int
345: trie_key_nth_bit(xstr* key, int n)
346: {
347: switch (n) {
348: case 0:
349: return 0;
350: case 1:
351: return key->len + 1;
352: default:
353: {
354: int pos;
355: n -= 2;
356: pos = n / (sizeof(xchar) << 3);
357: if (pos >= key->len) {
358: return 0;
359: }
360: return key->str[pos] & (1 << (n % (sizeof(xchar) << 3)));
361: }
362: }
363: }
364:
365:
366: static int
367: trie_key_first_diff_bit_1byte(xchar c1, xchar c2)
368: {
369: int i;
370: int ptn;
371: for (i = 0, ptn = c1 ^ c2; !(ptn & 1); i++, ptn >>= 1 )
372: ;
373: return i;
374: }
375:
376:
377:
378:
379:
380: #define MIN(a,b) ((a)<(b)?(a):(b))
381: static int
382: trie_key_first_diff_bit(xstr *k1, xstr *k2)
383: {
384: int len;
385: int i;
386:
387: len = MIN(k1->len, k2->len);
388: if (len == -1) {
389: return 1;
390: }
391: for ( i = 0 ; i < len ; i++ ){
392: if (k1->str[i] != k2->str[i]) {
393: return (2 + (i * (sizeof(xchar) << 3)) +
394: trie_key_first_diff_bit_1byte(k1->str[i], k2->str[i]));
395: }
396: }
397: if (k1->len < k2->len) {
398: return (2 + (i * (sizeof(xchar) << 3)) +
399: trie_key_first_diff_bit_1byte(0, k2->str[i]));
400: } else {
401: return (2 + (i * (sizeof(xchar) << 3)) +
402: trie_key_first_diff_bit_1byte(k1->str[i], 0));
403: }
404: }
405: #undef MIN
406:
407: static int
408: trie_key_cmp(xstr *k1, xstr *k2)
409: {
410: if (k1->len == -1 || k2->len == -1) {
411: return k1->len - k2->len;
412: }
413: return anthy_xstrcmp(k1, k2);
414: }
415:
416: static void
417: trie_key_dup(xstr *dst, xstr *src)
418: {
419: dst->str = anthy_xstr_dup_str(src);
420: dst->len = src->len;
421: }
422:
423:
424:
425:
426: static struct trie_node *
427: trie_find(struct trie_root *root, xstr *key)
428: {
429: struct trie_node *p;
430: struct trie_node *q;
431:
432: p = &root->root;
433: q = p->l;
434: while (p->bit < q->bit) {
435: p = q;
436: q = trie_key_nth_bit(key, p->bit) ? p->r : p->l;
437: }
438: return trie_key_cmp(&q->row.key,key) ? NULL : q;
439: }
440:
441:
442:
443:
444:
445: static struct trie_node *
446: trie_find_longest (struct trie_root* root, xstr *key)
447: {
448: struct trie_node *p;
449: struct trie_node *q;
450:
451: p = &root->root;
452: q = p->l;
453: while (p->bit < q->bit) {
454: p = q;
455: q = trie_key_nth_bit(key, p->bit) ? p->r : p->l;
456: }
457:
458: return q;
459: }
460:
461:
462:
463:
464:
465: static struct trie_node *
466: trie_insert(struct trie_root *root, xstr *key,
467: int dirty, int *nr_used, int *nr_sused)
468: {
469: struct trie_node *n;
470: struct trie_node *p;
471: struct trie_node *q;
472: int i;
473:
474: p = &root->root;
475: q = p->l;
476: while (p->bit < q->bit) {
477: p = q;
478: q = trie_key_nth_bit(key, p->bit) ? p->r : p->l;
479: }
480: if (trie_key_cmp(&q->row.key,key) == 0) {
481:
482: if (dirty == LRU_USED) {
483: trie_mark_used(root, q, nr_used, nr_sused);
484: } else if (q->dirty == 0) {
485: q->dirty = dirty;
486: }
487: return 0;
488: }
489: i = trie_key_first_diff_bit(&q->row.key, key);
490: p = &root->root;
491: q = p->l;
492: while (p->bit < q->bit && i > q->bit) {
493: p = q;
494: q = trie_key_nth_bit(key, p->bit) ? p->r : p->l;
495: }
496: n = anthy_smalloc(root->node_ator);
497: trie_row_init(&n->row);
498: trie_key_dup(&n->row.key, key);
499: n->bit = i;
500: if (trie_key_nth_bit(key, i)) {
501: n->l = q;
502: n->r = n;
503: } else {
504: n->l = n;
505: n->r = q;
506: }
507: if (p->l == q) {
508: p->l = n;
509: } else {
510: p->r = n;
511: }
512:
513:
514: if (dirty == LRU_USED) {
515: root->root.lru_next->lru_prev = n;
516: n->lru_prev = &root->root;
517: n->lru_next = root->root.lru_next;
518: root->root.lru_next = n;
519: (*nr_used)++;
520: } else {
521: root->root.lru_prev->lru_next = n;
522: n->lru_next = &root->root;
523: n->lru_prev = root->root.lru_prev;
524: root->root.lru_prev = n;
525: if (dirty == LRU_SUSED) {
526: (*nr_sused)++;
527: }
528: }
529: n->dirty = dirty;
530: return n;
531: }
532:
533:
534:
535:
536:
537:
538:
539:
540:
541:
542:
543:
544:
545:
546: static void
547: trie_remove(struct trie_root *root, xstr *key,
548: int *nr_used, int *nr_sused)
549: {
550: struct trie_node *p;
551: struct trie_node *q;
552: struct trie_node **pp = NULL;