1:
2:
3:
4:
5:
6:
7: #include <stdlib.h>
8: #include <time.h>
9:
10: #include <anthy/ordering.h>
11: #include <anthy/record.h>
12: #include <anthy/splitter.h>
13: #include <anthy/segment.h>
14: #include "sorter.h"
15:
16: #define MAX_OCHAIRE_ENTRY_COUNT 100
17: #define MAX_OCHAIRE_LEN 32
18: #define MAX_PREDICTION_ENTRY 100
19:
20: #define MAX_UNKNOWN_WORD 100
21:
22:
23: static void
24: learn_swapped_candidates(struct segment_list *sl)
25: {
26: int i;
27: struct seg_ent *seg;
28: for (i = 0; i < sl->nr_segments; i++) {
29: seg = anthy_get_nth_segment(sl, i);
30: if (seg->committed != 0) {
31:
32: anthy_swap_cand_ent(seg->cands[0],
33: seg->cands[seg->committed]);
34: }
35: }
36: anthy_cand_swap_ageup();
37: }
38:
39:
40: static void
41: learn_resized_segment(struct splitter_context *sc,
42: struct segment_list *sl)
43:
44: {
45: int i;
46: struct meta_word **mw
47: = alloca(sizeof(struct meta_word*) * sl->nr_segments);
48: int *len_array
49: = alloca(sizeof(int) * sl->nr_segments);
50:
51:
52: for (i = 0; i < sl->nr_segments; i++) {
53: struct seg_ent *se = anthy_get_nth_segment(sl, i);
54: mw[i] = se->cands[se->committed]->mw;
55: len_array[i] = se->str.len;
56: }
57:
58: anthy_commit_border(sc, sl->nr_segments, mw, len_array);
59: }
60:
61:
62: static void
63: clear_resized_segment(struct splitter_context *sc,
64: struct segment_list *sl)
65: {
66: int *mark, i, from;
67: struct seg_ent *seg;
68: mark = alloca(sizeof(int)*sc->char_count);
69: for (i = 0; i < sc->char_count; i++) {
70: mark[i] = 0;
71: }
72:
73: from = 0;
74: for (i = 0; i < sl->nr_segments; i++) {
75: seg = anthy_get_nth_segment(sl, i);
76: mark[from] = seg->len;
77: from = from + seg->len;
78: }
79: for (i = 0; i < sc->char_count; i++) {
80: int len = sc->ce[i].initial_seg_len;
81:
82:
83: if (len && len != mark[i]) {
84: xstr xs;
85: xs.str = sc->ce[i].c;
86: xs.len = len;
87: anthy_forget_unused_unknown_word(&xs);
88: }
89: }
90: if (!anthy_select_section("UNKNOWN_WORD", 0)) {
91: anthy_truncate_section(MAX_UNKNOWN_WORD);
92: }
93: }
94:
95:
96: static void
97: commit_ochaire(struct seg_ent *seg, int count, xstr* xs)
98: {
99: int i;
100: if (xs->len >= MAX_OCHAIRE_LEN) {
101: return ;
102: }
103: if (anthy_select_row(xs, 1)) {
104: return ;
105: }
106: anthy_set_nth_value(0, count);
107: for (i = 0; i < count; i++, seg = seg->next) {
108: anthy_set_nth_value(i * 2 + 1, seg->len);
109: anthy_set_nth_xstr(i * 2 + 2, &seg->cands[seg->committed]->str);
110: }
111: }
112:
113:
114:
115: static void
116: release_negative_ochaire(struct splitter_context *sc,
117: struct segment_list *sl)
118: {
119: int start, len;
120: xstr xs;
121: (void)sl;
122:
123: xs.len = sc->char_count;
124: xs.str = sc->ce[0].c;
125:
126:
127: for (start = 0; start < xs.len; start ++) {
128: for (len = 1; len <= xs.len - start && len < MAX_OCHAIRE_LEN; len ++) {
129: xstr part;
130: part.str = &xs.str[start];
131: part.len = len;
132: if (anthy_select_row(&part, 0) == 0) {
133: anthy_release_row();
134: }
135: }
136: }
137: }
138:
139:
140: static void
141: learn_ochaire(struct splitter_context *sc,
142: struct segment_list *sl)
143: {
144: int i;
145: int count;
146:
147: if (anthy_select_section("OCHAIRE", 1)) {
148: return ;
149: }
150:
151:
152: release_negative_ochaire(sc, sl);
153:
154:
155: for (count = 2; count <= sl->nr_segments && count < 5; count++) {
156:
157:
158: for (i = 0; i <= sl->nr_segments - count; i++) {
159: struct seg_ent *head = anthy_get_nth_segment(sl, i);
160: struct seg_ent *s;
161: xstr xs;
162: int j;
163: xs = head->str;
164: if (xs.len < 2 && count < 3) {
165:
166:
167: continue;
168: }
169:
170: for (j = 1, s = head->next; j < count; j++, s = s->next) {
171: xs.len += s->str.len;
172: }
173:
174: commit_ochaire(head, count, &xs);
175: }
176: }
177: if (anthy_select_section("OCHAIRE", 1)) {
178: return ;
179: }
180: anthy_truncate_section(MAX_OCHAIRE_ENTRY_COUNT);
181: }
182:
183: static int
184: learn_prediction_str(xstr *idx, xstr *xs)
185: {
186: int nr_predictions;
187: int i;
188: time_t t = time(NULL);
189: if (anthy_select_row(idx, 1)) {
190: return 0;
191: }
192: nr_predictions = anthy_get_nr_values();
193:
194:
195: for (i = 0; i < nr_predictions; i += 2) {
196: xstr *log = anthy_get_nth_xstr(i + 1);
197: if (!log) {
198: continue;
199: }
200: if (anthy_xstrcmp(log, xs) == 0) {
201: anthy_set_nth_value(i, t);
202: break;
203: }
204: }
205:
206:
207: if (i == nr_predictions) {
208: anthy_set_nth_value(nr_predictions, t);
209: anthy_set_nth_xstr(nr_predictions + 1, xs);
210: anthy_mark_row_used();
211: return 1;
212: }
213: anthy_mark_row_used();
214: return 0;
215: }
216:
217: static void
218: learn_prediction(struct segment_list *sl)
219: {
220: int i;
221: int added = 0;
222: if (anthy_select_section("PREDICTION", 1)) {
223: return ;
224: }
225: for (i = 0; i < sl->nr_segments; i++) {
226: struct seg_ent *seg = anthy_get_nth_segment(sl, i);
227: xstr *xs = &seg->cands[seg->committed]->str;
228:
229: if (seg->committed < 0) {
230: continue;
231: }
232: if (learn_prediction_str(&seg->str, xs)) {
233: added = 1;
234: }
235: }
236: if (added) {
237: anthy_truncate_section(MAX_PREDICTION_ENTRY);
238: }
239: }
240:
241: static void
242: learn_unknown(struct segment_list *sl)
243: {
244: int i;
245: for (i = 0; i < sl->nr_segments; i++) {
246: struct seg_ent *seg = anthy_get_nth_segment(sl, i);
247: struct cand_ent *ce = seg->cands[seg->committed];
248: if (ce->nr_words == 0) {
249: anthy_add_unknown_word(&seg->str, &ce->str);
250: }
251: }
252: }
253:
254: void
255: anthy_do_commit_prediction(xstr *src, xstr *xs)
256: {
257: if (anthy_select_section("PREDICTION", 1)) {
258: return ;
259: }
260: learn_prediction_str(src, xs);
261: }
262:
263: void
264: anthy_proc_commit(struct segment_list *sl,
265: struct splitter_context *sc)
266: {
267:
268: learn_swapped_candidates(sl);
269: learn_resized_segment(sc, sl);
270: clear_resized_segment(sc, sl);
271: learn_ochaire(sc, sl);
272: learn_prediction(sl);
273: learn_unknown(sl);
274: anthy_learn_cand_history(sl);
275: }