1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52: #include <unistd.h>
53: #include <sys/types.h>
54: #include <sys/stat.h>
55: #include <fcntl.h>
56:
57: #include <stdlib.h>
58: #include <stdio.h>
59: #include <string.h>
60: #include <ctype.h>
61: #include <anthy/texttrie.h>
62: #include <anthy/filemap.h>
63: #include "dic_main.h"
64:
65:
66: #define OBJ_LEN 20
67: #define LINE_LEN 32
68: #define EXPAND_COUNT 16
69:
70:
71: #define TT_SUPER 0
72: #define TT_UNUSED 1
73: #define TT_ALLOCED 2
74: #define TT_NODE 3
75: #define TT_BODY 4
76: #define TT_TAIL 5
77:
78:
79: struct cell {
80:
81: int type;
82:
83: union {
84:
85: int next_unused;
86:
87: struct {
88: int first_unused;
89: int root_cell;
90: int size;
91: int serial;
92: } super;
93:
94: struct {
95: int key;
96: int next;
97: int child;
98: int body;
99: int parent;
100: } node;
101:
102: struct {
103: int owner;
104: char *obj;
105: } body;
106:
107: struct {
108: char *obj;
109: int prev;
110: } tail;
111: } u;
112:
113: int next_tail;
114: };
115:
116: struct text_trie {
117:
118: int fatal;
119:
120: char *fn;
121: FILE *wfp;
122: struct filemapping *mapping;
123: char *ptr;
124:
125: struct cell super;
126: int valid_super;
127: };
128:
129: struct path {
130:
131: const char *key_str;
132:
133: int max_len;
134: int *path;
135: int len;
136: int cur;
137: };
138:
139: static void
140: print_super_cell(struct cell *c)
141: {
142: printf("super first_unused=%d, root_cell=%d, size=%d, serial=%d\n",
143: c->u.super.first_unused, c->u.super.root_cell,
144: c->u.super.size, c->u.super.serial);
145: }
146:
147: static void
148: print_alloced_cell(void)
149: {
150: printf("alloc-ed\n");
151: }
152:
153: static void
154: print_node_cell(struct cell *c)
155: {
156: printf("node key=%d", c->u.node.key);
157: if (c->u.node.key > 0 && isprint(c->u.node.key)) {
158: printf("(%c)", c->u.node.key);
159: }
160: printf(" parent=%d next=%d child=%d body=%d\n",
161: c->u.node.parent, c->u.node.next, c->u.node.child, c->u.node.body);
162: }
163:
164: static void
165: print_unused_cell(struct cell *c)
166: {
167: printf("unused next_unused=%d\n",
168: c->u.next_unused);
169: }
170:
171: static void
172: print_body_cell(struct cell *c)
173: {
174: printf("body object=(%s), owner=%d, next_tail=%d\n",
175: c->u.body.obj, c->u.body.owner, c->next_tail);
176: }
177:
178: static void
179: print_tail_cell(struct cell *c)
180: {
181: printf("tail object=(%s), prev=%d, next_tail=%d\n",
182: c->u.tail.obj, c->u.tail.prev, c->next_tail);
183: }
184:
185: static void
186: print_cell(int idx, struct cell *c)
187: {
188: if (!c) {
189: printf("idx =%d(null cell):\n", idx);
190: return ;
191: }
192: printf("idx=%d:", idx);
193: switch (c->type) {
194: case TT_SUPER:
195: print_super_cell(c);
196: break;
197: case TT_ALLOCED:
198: print_alloced_cell();
199: break;
200: case TT_NODE:
201: print_node_cell(c);
202: break;
203: case TT_UNUSED:
204: print_unused_cell(c);
205: break;
206: case TT_BODY:
207: print_body_cell(c);
208: break;
209: case TT_TAIL:
210: print_tail_cell(c);
211: break;
212: default:
213: printf("unknown\n");
214: }
215: }
216:
217: static void
218: path_setup(struct path *path, const char *key, int len, int *buf)
219: {
220: unsigned char *p = (unsigned char *)key;
221: path->key_str = key;
222: path->max_len = len;
223: path->path = buf;
224: path->len = 0;
225: path->cur = 0;
226:
227: while (*p) {
228: path->path[path->len] = p[0] * 256 + p[1];
229: path->len ++;
230: p++;
231: if (p[0]) {
232: p++;
233: }
234: }
235: }
236:
237: static void
238: path_copy_to_str(struct path *path, char *str, int buf_len)
239: {
240: unsigned char *p = (unsigned char *)str;
241: int i, o;
242: for (i = 0, o = 0; i < path->cur && o < buf_len - 2; i++) {
243: p[o] = (path->path[i]>>8)&255;
244: p[o+1] = path->path[i]&255;
245: o += 2;
246: }
247: p[o] = 0;
248: }
249:
250: static int
251: sput_int(char *buf, int num)
252: {
253: unsigned char *tmp = (unsigned char *)buf;
254: tmp[0] = (num>>24)&255;
255: tmp[1] = (num>>16)&255;
256: tmp[2] = (num>>8)&255;
257: tmp[3] = num&255;
258: return 4;
259: }
260:
261: static char *
262: sget_int(char *buf, int *num)
263: {
264: unsigned int res;
265: unsigned char *tmp = (unsigned char *)buf;
266: res = 0;
267: res += tmp[0] << 24;
268: res += tmp[1] << 16;
269: res += tmp[2] << 8;
270: res += tmp[3];
271: *num = res;
272: buf += 4;
273: return buf;
274: }
275:
276: static char *
277: pass_str(char *buf, const char *str)
278: {
279: buf += strlen(str);
280: return buf;
281: }
282:
283: static void
284: encode_super(struct cell *c, char *buf)
285: {
286: buf += sprintf(buf, "S ");
287: buf += sput_int(buf, c->u.super.size);
288: buf += sput_int(buf, c->u.super.root_cell);
289: buf += sput_int(buf, c->u.super.first_unused);
290: buf += sput_int(buf, c->u.super.serial);
291: buf += sput_int(buf, LINE_LEN);
292: }
293:
294: static void
295: encode_node(struct cell *c, char *buf)
296: {
297: buf += sprintf(buf, "N ");
298: buf += sput_int(buf, c->u.node.key);
299: buf += sput_int(buf, c->u.node.parent);
300: buf += sput_int(buf, c->u.node.next);
301: buf += sput_int(buf, c->u.node.child);
302: buf += sput_int(buf, c->u.node.body);
303: }
304:
305: static void
306: encode_body(struct cell *c, char *buf)
307: {
308: buf += sprintf(buf, "B");
309: buf += sput_int(buf, c->next_tail);
310: buf += sput_int(buf, c->u.body.owner);
311: sprintf(buf, "\"%s\"",
312: c->u.body.obj);
313: }
314:
315: static void
316: encode_unused(struct cell *c, char *buf)
317: {
318: buf += sprintf(buf, "-next=");
319: buf += sput_int(buf, c->u.next_unused);
320: }
321:
322: static void
323: encode_tail(struct cell *c, char *buf)
324: {
325: buf += sprintf(buf, "T");
326: buf += sput_int(buf, c->u.tail.prev);
327: buf += sput_int(buf, c->next_tail);
328: sprintf(buf, "\"%s\"",
329: c->u.tail.obj);
330: }
331:
332: static void
333: encode_unknown(char *buf)
334: {
335: sprintf(buf, "?");
336: }
337:
338: static void
339: encode_cell(struct cell *c, char *buf)
340: {
341: switch (c->type) {
342: case TT_SUPER:
343: encode_super(c, buf);
344: break;
345: case TT_NODE:
346: encode_node(c, buf);
347: break;
348: case TT_UNUSED:
349: encode_unused(c, buf);
350: break;
351: case TT_BODY:
352: encode_body(c, buf);
353: break;
354: case TT_TAIL:
355: encode_tail(c, buf);
356: break;
357: default:
358: encode_unknown(buf);
359: break;
360: }
361: }
362:
363: static void
364: write_back_cell(struct text_trie *tt, struct cell *c, int idx)
365: {
366: int i;
367: char buf[LINE_LEN+1];
368:
369: if (((anthy_mmap_size(tt->mapping) / LINE_LEN) < (idx + 1)) ||
370: idx < 0) {
371: return ;
372: }
373: for (i = 0; i < LINE_LEN; i++) {
374: buf[i] = ' ';
375: }
376: encode_cell(c, buf);
377: buf[LINE_LEN-1] = '\n';
378: if (anthy_mmap_is_writable(tt->mapping)) {
379: memcpy(&tt->ptr[idx*LINE_LEN], buf, LINE_LEN);
380: } else {
381: fseek(tt->wfp, idx*LINE_LEN, SEEK_SET);
382: fwrite(buf, LINE_LEN, 1, tt->wfp);
383: fflush(tt->wfp);
384: }
385: }
386:
387: static char *
388: decode_str(char *raw_buf, int off)
389: {
390: char *head;
391: char copy_buf[LINE_LEN + 1];
392: char *buf;
393: int i;
394:
395: for (i = 0; i < LINE_LEN - off - 1; i++) {
396: copy_buf[i] = raw_buf[i];
397: }
398: copy_buf[i] = 0;
399: buf = copy_buf;
400:
401: while (*buf && *buf != '\"') {
402: buf ++;
403: }
404: if (!*buf) {
405:
406: return strdup("");
407: }
408: buf ++;
409: head = buf;
410:
411: while (*buf) {
412: buf ++;
413: }
414:
415: while (*buf != '\"') {
416: buf--;
417: }
418: *buf = 0;
419:
420: return strdup(head);
421: }
422:
423: static void
424: release_cell_str(struct cell *c)
425: {
426: if (!c) {
427: return ;
428: }
429: if (c->type == TT_BODY) {
430: free(c->u.body.obj);
431: }
432: if (c->type == TT_TAIL) {
433: free(c->u.tail.obj);
434: }
435: }
436:
437: static int
438: decode_super(struct cell *c, char *buf)
439: {
440: c->type = TT_SUPER;
441: buf = pass_str(buf, "S ");
442: buf = sget_int(buf, &c->u.super.size);
443: buf = sget_int(buf, &c->u.super.root_cell);
444: buf = sget_int(buf, &c->u.super.first_unused);
445: buf = sget_int(buf, &c->u.super.serial);
446: return 0;
447: }
448:
449: static int
450: decode_unuse(struct cell *c, char *buf)
451: {
452: c->type = TT_UNUSED;
453: buf = pass_str(buf, "-next=");
454: buf = sget_int(buf, &c->u.next_unused);
455: return 0;
456: }
457:
458: static int
459: decode_node(struct cell *c, char *buf)
460: {
461: c->type = TT_NODE;
462: buf = pass_str(buf, "N ");
463: buf = sget_int(buf, &c->u.node.key);
464: buf = sget_int(buf, &c->u.node.parent);
465: buf = sget_int(buf, &c->u.node.next);
466: buf = sget_int(buf, &c->u.node.child);
467: buf = sget_int(buf, &c->u.node.body);
468: return 0;
469: }
470:
471: static int
472: decode_body(struct cell *c, char *buf)
473: {
474: c->type = TT_BODY;
475: buf = pass_str(buf, "B");
476: buf = sget_int(buf, &c->next_tail);
477: buf = sget_int(buf, &c->u.body.owner);
478: c->u.body.obj = decode_str(buf, 9);
479: return 0;
480: }
481:
482: static int
483: decode_tail(struct cell *c, char *buf)
484: {
485: c->type = TT_TAIL;
486: buf = pass_str(buf, "T");
487: buf = sget_int(buf, &c->u.tail.prev);
488: buf = sget_int(buf, &c->next_tail);
489: c->u.tail.obj = decode_str(buf, 9);
490: return 0;
491: }
492:
493: static int
494: decode_alloced(struct cell *c)
495: {
496: c->type = TT_ALLOCED;
497: return 0;
498: }
499:
500: static struct cell *
501: decode_nth_cell(struct text_trie *tt, struct cell *c, int nth)
502: {
503: int res;
504: char *buf;
505: if (nth < 0 ||
506: (anthy_mmap_size(tt->mapping) / LINE_LEN) <
507: (nth + 1)) {
508: return NULL;
509: }
510: buf = &tt->ptr[nth*LINE_LEN];
511:
512: res = -1;
513: switch (buf[0]) {
514: case 'S':
515: res = decode_super(c, buf);
516: break;
517: case '-':
518: res = decode_unuse(c, buf);
519: break;
520: case 'N':
521: res = decode_node(c, buf);
522: break;
523: case 'B':
524: res = decode_body(c, buf);
525: break;
526: case 'T':
527: res = decode_tail(c, buf);
528: break;
529: case '?':
530: res = decode_alloced(c);
531: break;
532: default:
533:
534: ;
535: }
536: if (res) {
537: c->type = TT_UNUSED;
538: }
539: return c;
540: }
541:
542: static struct cell *
543: decode_nth_node(struct text_trie *tt, struct cell *c, int nth)
544: {
545: if (!decode_nth_cell(tt, c, nth)) {
546: return NULL;
547: }
548: if (c->type != TT_NODE) {
549: return NULL;
550: }
551: return c;
552: }
553:
554: static int
555: update_mapping(struct text_trie *tt)
556: {
557: if (tt->mapping) {
558: anthy_munmap(tt->mapping);
559: }
560: tt->mapping = anthy_mmap(tt->fn, 1);
561: if (!tt->mapping) {
562:
563: tt->mapping = anthy_mmap(tt->fn, 0);
564: }
565: if (!tt->mapping) {
566: tt->ptr = NULL;
567: return 1;
568: }
569: tt->ptr = anthy_mmap_address(tt->mapping);
570: return 0;
571: }
572:
5