1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22: #include <stdio.h>
23: #include <stdlib.h>
24: #include <string.h>
25:
26: #include "config.h"
27: #include <anthy/anthy.h>
28:
29: #include <anthy/conf.h>
30: #include <anthy/ruleparser.h>
31: #include <anthy/xstr.h>
32: #include <anthy/filemap.h>
33: #include <anthy/logger.h>
34: #include <anthy/segclass.h>
35: #include <anthy/splitter.h>
36: #include <anthy/wtype.h>
37: #include <anthy/diclib.h>
38: #include "wordborder.h"
39:
40:
41: static struct dep_dic ddic;
42:
43:
44: static void
45: match_branch(struct splitter_context *sc,
46: struct word_list *tmpl,
47: xstr *xs, struct dep_branch *db);
48: static void
49: match_nodes(struct splitter_context *sc,
50: struct word_list *wl,
51: xstr follow_str, int node);
52:
53:
54: static int
55: anthy_xstrcmp_with_ondisk(xstr *xs,
56: ondisk_xstr *dxs)
57: {
58: int *d = (int *)dxs;
59: int len = anthy_dic_ntohl(d[0]);
60: int i;
61: xchar c;
62: if (len != xs->len) {
63: return 1;
64: }
65: d++;
66: for (i = 0; i < len; i++) {
67: c = anthy_dic_ntohl(d[i]);
68: if (xs->str[i] != c) {
69: return 1;
70: }
71: }
72: return 0;
73: }
74:
75: static ondisk_xstr *
76: anthy_next_ondisk_xstr(ondisk_xstr *dxs)
77: {
78: int *d = (int *)dxs;
79: int len = anthy_dic_ntohl(d[0]);
80: return &d[len+1];
81: }
82:
83: static int
84: anthy_ondisk_xstr_len(ondisk_xstr *dxs)
85: {
86: int *d = (int *)dxs;
87: return anthy_dic_ntohl(d[0]);
88: }
89:
90:
91:
92:
93:
94:
95:
96:
97: static void
98: match_nodes(struct splitter_context *sc,
99: struct word_list *wl,
100: xstr follow_str, int node)
101: {
102: struct dep_node *dn = &ddic.nodes[node];
103: struct dep_branch *db;
104: int i,j;
105:
106:
107: for (i = 0; i < dn->nr_branch; i++) {
108: ondisk_xstr *dep_xs;
109: db = &dn->branch[i];
110: dep_xs = db->xstrs;
111:
112:
113: for (j = 0; j < db->nr_strs;
114: j++, dep_xs = anthy_next_ondisk_xstr(dep_xs)) {
115: xstr cond_xs;
116:
117: if (follow_str.len < anthy_ondisk_xstr_len(dep_xs)) {
118: continue;
119: }
120:
121: cond_xs.str = follow_str.str;
122: cond_xs.len = anthy_ondisk_xstr_len(dep_xs);
123:
124:
125: if (!anthy_xstrcmp_with_ondisk(&cond_xs, dep_xs)) {
126:
127: struct word_list new_wl = *wl;
128: struct part_info *part = &new_wl.part[PART_DEPWORD];
129: xstr new_follow;
130:
131: part->len += cond_xs.len;
132: new_follow.str = &follow_str.str[cond_xs.len];
133: new_follow.len = follow_str.len - cond_xs.len;
134:
135: match_branch(sc, &new_wl, &new_follow, db);
136: }
137: }
138: }
139: }
140:
141:
142:
143:
144:
145:
146:
147:
148: static void
149: match_branch(struct splitter_context *sc,
150: struct word_list *tmpl,
151: xstr *xs, struct dep_branch *db)
152: {
153: struct part_info *part = &tmpl->part[PART_DEPWORD];
154: int i;
155:
156:
157: for (i = 0; i < db->nr_transitions; i++) {
158:
159: int head_pos = tmpl->head_pos;
160: int features = tmpl->mw_features;
161: enum dep_class dc = part->dc;
162:
163: struct dep_transition *transition = &db->transition[i];
164:
165: tmpl->tail_ct = anthy_dic_ntohl(transition->ct);
166:
167: if (anthy_dic_ntohl(transition->dc) != DEP_NONE) {
168: part->dc = anthy_dic_ntohl(transition->dc);
169: }
170:
171: if (anthy_dic_ntohl(transition->head_pos) != POS_NONE) {
172: tmpl->head_pos = anthy_dic_ntohl(transition->head_pos);
173: }
174: if (transition->weak) {
175: tmpl->mw_features |= MW_FEATURE_WEAK_CONN;
176: }
177:
178:
179: if (anthy_dic_ntohl(transition->next_node)) {
180:
181: match_nodes(sc, tmpl, *xs, anthy_dic_ntohl(transition->next_node));
182: } else {
183: struct word_list *wl;
184:
185:
186:
187:
188:
189: wl = anthy_alloc_word_list(sc);
190: *wl = *tmpl;
191: wl->len += part->len;
192:
193:
194: anthy_commit_word_list(sc, wl);
195: }
196:
197: part->dc = dc;
198: tmpl->head_pos = head_pos;
199: tmpl->mw_features = features;
200: }
201: }
202:
203:
204:
205: void
206: anthy_scan_node(struct splitter_context *sc,
207: struct word_list *tmpl,
208: xstr *follow, int node)
209: {
210:
211: match_nodes(sc, tmpl, *follow, node);
212: }
213:
214:
215:
216:
217: static void
218: read_xstr(struct dep_dic* ddic, int* offset)
219: {
220: int len = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
221: *offset += sizeof(int);
222: *offset += sizeof(xchar) * len;
223: }
224:
225: static void
226: read_branch(struct dep_dic* ddic, struct dep_branch* branch, int* offset)
227: {
228: int i;
229:
230:
231: branch->nr_strs = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
232: *offset += sizeof(int);
233:
234: branch->xstrs = (ondisk_xstr *)&ddic->file_ptr[*offset];
235:
236: for (i = 0; i < branch->nr_strs; ++i) {
237: read_xstr(ddic, offset);
238: }
239:
240: branch->nr_transitions = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
241: *offset += sizeof(int);
242: branch->transition = (struct dep_transition*)&ddic->file_ptr[*offset];
243: *offset += sizeof(struct dep_transition) * branch->nr_transitions;
244: }
245:
246: static void
247: read_node(struct dep_dic* ddic, struct dep_node* node, int* offset)
248: {
249: int i;
250: node->nr_branch = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
251: *offset += sizeof(int);
252:
253: node->branch = malloc(sizeof(struct dep_branch) * node->nr_branch);
254: for (i = 0; i < node->nr_branch; ++i) {
255: read_branch(ddic, &node->branch[i], offset);
256: }
257: }
258:
259: static void
260: read_file(void)
261: {
262: int i;
263:
264: int offset = 0;
265:
266: ddic.file_ptr = anthy_file_dic_get_section("dep_dic");
267:
268:
269: ddic.nrRules = anthy_dic_ntohl(*(int*)&ddic.file_ptr[offset]);
270: offset += sizeof(int);
271:
272:
273: ddic.rules = (struct ondisk_wordseq_rule*)&ddic.file_ptr[offset];
274: offset += sizeof(struct ondisk_wordseq_rule) * ddic.nrRules;
275:
276: ddic.nrNodes = anthy_dic_ntohl(*(int*)&ddic.file_ptr[offset]);
277: offset += sizeof(int);
278:
279:
280: ddic.nodes = malloc(sizeof(struct dep_node) * ddic.nrNodes);
281: for (i = 0; i < ddic.nrNodes; ++i) {
282: read_node(&ddic, &ddic.nodes[i], &offset);
283: }
284: }
285:
286: int
287: anthy_get_nr_dep_rule()
288: {
289: return ddic.nrRules;
290: }
291:
292: void
293: anthy_get_nth_dep_rule(int index, struct wordseq_rule *rule)
294: {
295:
296: struct ondisk_wordseq_rule *r = &ddic.rules[index];
297: rule->wt = anthy_get_wtype(r->wt[0], r->wt[1], r->wt[2],
298: r->wt[3], r->wt[4], r->wt[5]);
299: rule->node_id = anthy_dic_ntohl(r->node_id);
300: }
301:
302: int
303: anthy_init_depword_tab()
304: {
305: read_file();
306: return 0;
307: }
308:
309: void
310: anthy_quit_depword_tab(void)
311: {
312: int i;
313: for (i = 0; i < ddic.nrNodes; i++) {
314: struct dep_node* node = &ddic.nodes[i];
315: free(node->branch);
316: }
317: free(ddic.nodes);
318: }
319: