1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11: #include <stdlib.h>
12:
13: #include <anthy/segment.h>
14: #include <anthy/record.h>
15: #include "sorter.h"
16:
17: #define HISTORY_DEPTH 8
18: #define MAX_HISTORY_ENTRY 200
19:
20:
21: static void
22: learn_cand_history(struct seg_ent *seg)
23: {
24: int nr, i;
25:
26: if (anthy_select_section("CAND_HISTORY", 1)) {
27: return ;
28: }
29: if (anthy_select_row(&seg->str, 1)) {
30: return ;
31: }
32:
33: nr = anthy_get_nr_values();
34: nr ++;
35: if (nr > HISTORY_DEPTH) {
36: nr = HISTORY_DEPTH;
37: }
38: for (i = nr - 1; i > 0; i--) {
39: xstr *xs = anthy_get_nth_xstr(i - 1);
40: anthy_set_nth_xstr(i, xs);
41: }
42:
43: anthy_set_nth_xstr(0, &seg->cands[seg->committed]->str);
44: anthy_mark_row_used();
45: }
46:
47: static void
48: learn_suffix_history(struct seg_ent *seg)
49: {
50: int i;
51: struct cand_ent *cand = seg->cands[seg->committed];
52: if (anthy_select_section("SUFFIX_HISTORY", 1)) {
53: return ;
54: }
55: for (i = 0; i < cand->nr_words; i++) {
56: struct cand_elm *elm = &cand->elm[i];
57: xstr xs;
58: if (elm->nth == -1) {
59: continue;
60: }
61: if (anthy_wtype_get_pos(elm->wt) != POS_SUC) {
62: continue;
63: }
64: if (anthy_select_row(&elm->str, 1)) {
65: continue;
66: }
67: if (anthy_get_nth_dic_ent_str(elm->se, &elm->str, elm->nth, &xs)) {
68: continue;
69: }
70: anthy_set_nth_xstr(0, &xs);
71: free(xs.str);
72: }
73: }
74:
75:
76:
77: void
78: anthy_learn_cand_history(struct segment_list *sl)
79: {
80: int i, nr = 0;
81: for (i = 0; i < sl->nr_segments; i++) {
82: struct seg_ent *seg = anthy_get_nth_segment(sl, i);
83: xstr *xs = &seg->str;
84: if (seg->committed < 0) {
85: continue;
86: }
87: if (anthy_select_row(xs, 0)) {
88: if (seg->committed == 0) {
89:
90: continue;
91: }
92: }
93:
94: learn_cand_history(seg);
95: learn_suffix_history(seg);
96: nr ++;
97: }
98: if (nr > 0) {
99: if (!anthy_select_section("CAND_HISTORY", 1)) {
100: anthy_truncate_section(MAX_HISTORY_ENTRY);
101: }
102: if (!anthy_select_section("SUFFIX_HISTORY", 1)) {
103: anthy_truncate_section(MAX_HISTORY_ENTRY);
104: }
105: }
106: }
107:
108:
109: static int
110: get_history_weight(xstr *xs)
111: {
112: int i, nr = anthy_get_nr_values();
113: int w = 0;
114: for (i = 0; i < nr; i++) {
115: xstr *h = anthy_get_nth_xstr(i);
116: if (!h) {
117: continue;
118: }
119: if (!anthy_xstrcmp(xs, h)) {
120: w++;
121: if (i == 0) {
122:
123: w += (HISTORY_DEPTH / 2);
124: }
125: }
126: }
127: return w;
128: }
129:
130: static void
131: reorder_by_candidate(struct seg_ent *se)
132: {
133: int i, primary_score;
134:
135: if (anthy_select_section("CAND_HISTORY", 1)) {
136: return ;
137: }
138: if (anthy_select_row(&se->str, 0)) {
139: return ;
140: }
141:
142: primary_score = se->cands[0]->score;
143:
144: for (i = 0; i < se->nr_cands; i++) {
145: struct cand_ent *ce = se->cands[i];
146: int weight = get_history_weight(&ce->str);
147: ce->score += primary_score / (HISTORY_DEPTH /2) * weight;
148: }
149: anthy_mark_row_used();
150: }
151:
152:
153: static void
154: reorder_by_suffix(struct seg_ent *se)
155: {
156: int i, j;
157: int delta = 0;
158: int top_cand = -1;
159: if (anthy_select_section("SUFFIX_HISTORY", 0)) {
160: return ;
161: }
162:
163: for (i = 0; i < se->nr_cands; i++) {
164: struct cand_ent *ce = se->cands[i];
165:
166: for (j = 0; j < ce->nr_words; j++) {
167: struct cand_elm *elm = &ce->elm[j];
168: xstr xs;
169: if (elm->nth == -1) {
170: continue;
171: }
172: if (anthy_wtype_get_pos(elm->wt) != POS_SUC) {
173: continue;
174: }
175:
176: if (anthy_select_row(&elm->str, 0)) {
177: continue;
178: }
179:
180: if (anthy_get_nth_dic_ent_str(elm->se, &elm->str, elm->nth, &xs)) {
181: continue;
182: }
183:
184: if (anthy_xstrcmp(&xs, anthy_get_nth_xstr(0))) {
185: free(xs.str);
186: continue;
187: }
188:
189: if (top_cand < 0) {
190: top_cand = i;
191: }
192: if (delta == 0) {
193: delta = (se->cands[top_cand]->score - ce->score) + 1;
194: }
195: ce->score += delta;
196: free(xs.str);
197: }
198: }
199: }
200:
201:
202: void
203: anthy_reorder_candidates_by_history(struct seg_ent *se)
204: {
205: reorder_by_candidate(se);
206: reorder_by_suffix(se);
207: }