1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28: #include <limits.h>
29: #include <stdlib.h>
30: #include <stdio.h>
31:
32: #include <anthy/segment.h>
33: #include <anthy/splitter.h>
34: #include <anthy/ordering.h>
35: #include "sorter.h"
36:
37:
38: #define OCHAIRE_BASE OCHAIRE_SCORE
39:
40: #define NOCONV_WITH_BIAS 900000
41:
42: #define NORMAL_BASE 100
43:
44: #define SINGLEWORD_BASE 10
45:
46: #define COMPOUND_BASE (OCHAIRE_SCORE / 2)
47:
48: #define COMPOUND_PART_BASE 2
49:
50: #define DEPWORD_BASE (OCHAIRE_SCORE / 2)
51:
52: #define NOCONV_BASE 1
53:
54:
55: static int
56: uncertain_segment_p(struct seg_ent *se)
57: {
58: struct meta_word *mw;
59: if (se->nr_metaword == 0) {
60: return 0;
61: }
62:
63: mw = se->mw_array[0];
64:
65:
66: if (se->len * 3 >= mw->len * 5) {
67: return 1;
68: }
69: return 0;
70: }
71:
72: static void
73: release_redundant_candidate(struct seg_ent *se)
74: {
75: int i, j;
76:
77: for (i = 0; i < se->nr_cands && se->cands[i]->score; i++);
78:
79: if (i < se->nr_cands) {
80: for (j = i; j < se->nr_cands; j++) {
81: anthy_release_cand_ent(se->cands[j]);
82: }
83: se->nr_cands = i;
84: }
85: }
86:
87:
88: static int
89: candidate_compare_func(const void *p1, const void *p2)
90: {
91: const struct cand_ent *const *c1 = p1, *const *c2 = p2;
92: return (*c2)->score - (*c1)->score;
93: }
94:
95: static void
96: sort_segment(struct seg_ent *se)
97: {
98: qsort(se->cands, se->nr_cands,
99: sizeof(struct cand_ent *),
100: candidate_compare_func);
101: }
102:
103: static void
104: trim_kana_candidate(struct seg_ent *se)
105: {
106: int i;
107: if (se->cands[0]->flag & CEF_KATAKANA) {
108: return ;
109: }
110: for (i = 1; i < se->nr_cands; i++) {
111: if (se->cands[i]->flag & CEF_KATAKANA) {
112:
113: se->cands[i]->score = NOCONV_BASE;
114: }
115: }
116: }
117:
118: static void
119: check_dupl_candidate(struct seg_ent *se)
120: {
121: int i,j;
122: for (i = 0; i < se->nr_cands - 1; i++) {
123: for (j = i + 1; j < se->nr_cands; j++) {
124: if (!anthy_xstrcmp(&se->cands[i]->str, &se->cands[j]->str)) {
125:
126: se->cands[j]->score = 0;
127: se->cands[i]->flag |= se->cands[j]->flag;
128: }
129: }
130: }
131: }
132:
133:
134: static void
135: eval_candidate_by_metaword(struct cand_ent *ce)
136: {
137: int i;
138: int score = 1;
139:
140:
141: for (i = 0; i < ce->nr_words; i++) {
142: struct cand_elm *elm = &ce->elm[i];
143: int pos, div = 1;
144: int freq;
145:
146: if (elm->nth < 0) {
147:
148: continue;
149: }
150: pos = anthy_wtype_get_pos(elm->wt);
151: if (pos == POS_PRE || pos == POS_SUC) {
152: div = 4;
153: }
154:
155: freq = anthy_get_nth_dic_ent_freq(elm->se, elm->nth);
156: score += freq / div;
157: }
158:
159: if (ce->mw) {
160: score *= ce->mw->struct_score;
161: score /= RATIO_BASE;
162: }
163: ce->score = score;
164: }
165:
166:
167: static void
168: eval_candidate(struct cand_ent *ce, int uncertain)
169: {
170: if ((ce->flag &
171: (CEF_OCHAIRE | CEF_SINGLEWORD | CEF_HIRAGANA |
172: CEF_KATAKANA | CEF_GUESS | CEF_COMPOUND | CEF_COMPOUND_PART |
173: CEF_BEST)) == 0) {
174:
175: eval_candidate_by_metaword(ce);
176: } else if (ce->flag & CEF_OCHAIRE) {
177: ce->score = OCHAIRE_BASE;
178: } else if (ce->flag & CEF_SINGLEWORD) {
179: ce->score = SINGLEWORD_BASE;
180: } else if (ce->flag & CEF_COMPOUND) {
181: ce->score = COMPOUND_BASE;
182: } else if (ce->flag & CEF_COMPOUND_PART) {
183: ce->score = COMPOUND_PART_BASE;
184: } else if (ce->flag & CEF_BEST) {
185: ce->score = OCHAIRE_BASE;
186: } else if (ce->flag & (CEF_HIRAGANA | CEF_KATAKANA |
187: CEF_GUESS)) {
188: if (uncertain) {
189:
190:
191:
192:
193: ce->score = NOCONV_WITH_BIAS;
194: if (CEF_KATAKANA & ce->flag) {
195: ce->score ++;
196: }
197: if (CEF_GUESS & ce->flag) {
198: ce->score += 2;
199: }
200: } else {
201: ce->score = NOCONV_BASE;
202: }
203: }
204: ce->score += 1;
205: }
206:
207: static void
208: eval_segment(struct seg_ent *se)
209: {
210: int i;
211: int uncertain = uncertain_segment_p(se);
212: for (i = 0; i < se->nr_cands; i++) {
213: eval_candidate(se->cands[i], uncertain);
214: }
215: }
216:
217:
218: static void
219: apply_learning(struct segment_list *sl, int nth)
220: {
221: int i;
222:
223:
224:
225:
226:
227:
228: anthy_reorder_candidates_by_relation(sl, nth);
229:
230: for (i = nth; i < sl->nr_segments; i++) {
231: struct seg_ent *seg = anthy_get_nth_segment(sl, i);
232:
233: anthy_proc_swap_candidate(seg);
234:
235: anthy_reorder_candidates_by_history(anthy_get_nth_segment(sl, i));
236: }
237: }
238:
239:
240:
241:
242: void
243: anthy_sort_candidate(struct segment_list *sl, int nth)
244: {
245: int i;
246: for (i = nth; i < sl->nr_segments; i++) {
247: struct seg_ent *seg = anthy_get_nth_segment(sl, i);
248:
249: eval_segment(seg);
250:
251: sort_segment(seg);
252:
253: check_dupl_candidate(seg);
254:
255: sort_segment(seg);
256:
257: release_redundant_candidate(seg);
258: }
259:
260:
261: apply_learning(sl, nth);
262:
263:
264: for ( i = nth ; i < sl->nr_segments ; i++){
265: sort_segment(anthy_get_nth_segment(sl, i));
266: }
267:
268: for (i = nth; i < sl->nr_segments; i++) {
269: trim_kana_candidate(anthy_get_nth_segment(sl, i));
270: }
271:
272: for ( i = nth ; i < sl->nr_segments ; i++){
273: sort_segment(anthy_get_nth_segment(sl, i));
274: }
275: }