1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23: #include <stdio.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <arpa/inet.h>
27: #include <anthy/segclass.h>
28: #include <anthy/feature_set.h>
29:
30: #include <anthy/splitter.h>
31:
32:
33:
34:
35:
36:
37:
38:
39:
40: #define CUR_CLASS_BASE 0
41: #define DEP_TYPE_FEATURE_BASE 20
42: #define CLASS_TRANS_BASE 30
43: #define FEATURE_SV 542
44: #define FEATURE_WEAK 543
45: #define FEATURE_SUFFIX 544
46: #define FEATURE_NUM 546
47: #define FEATURE_CORE1 547
48: #define FEATURE_HIGH_FREQ 548
49: #define FEATURE_WEAK_SEQ 549
50: #define COS_BASE 573
51: #define DEP_FEATURE_BASE 580
52:
53: void
54: anthy_feature_list_init(struct feature_list *fl)
55: {
56: fl->nr = 0;
57: fl->size = NR_EM_FEATURES;
58: }
59:
60: void
61: anthy_feature_list_free(struct feature_list *fl)
62: {
63: (void)fl;
64: }
65:
66: void
67: anthy_feature_list_add(struct feature_list *fl, int f)
68: {
69: if (fl->nr < NR_EM_FEATURES) {
70: fl->u.index[fl->nr] = f;
71: fl->nr++;
72: }
73: }
74:
75: int
76: anthy_feature_list_nr(const struct feature_list *fl)
77: {
78: return fl->nr;
79: }
80:
81: int
82: anthy_feature_list_nth(const struct feature_list *fl, int nth)
83: {
84: return fl->u.index[nth];
85: }
86:
87: static int
88: cmp_short(const void *p1, const void *p2)
89: {
90: return *((short *)p1) - *((short *)p2);
91: }
92:
93: void
94: anthy_feature_list_sort(struct feature_list *fl)
95: {
96: qsort(fl->u.index, fl->nr, sizeof(fl->u.index[0]),
97: cmp_short);
98: }
99:
100:
101: void
102: anthy_feature_list_set_cur_class(struct feature_list *fl, int cl)
103: {
104: anthy_feature_list_add(fl, CUR_CLASS_BASE + cl);
105: }
106:
107: void
108: anthy_feature_list_set_class_trans(struct feature_list *fl, int pc, int cc)
109: {
110: anthy_feature_list_add(fl, CLASS_TRANS_BASE + pc * SEG_SIZE + cc);
111: }
112:
113: void
114: anthy_feature_list_set_dep_word(struct feature_list *fl, int h)
115: {
116: anthy_feature_list_add(fl, h + DEP_FEATURE_BASE);
117: }
118:
119: void
120: anthy_feature_list_set_dep_class(struct feature_list *fl, int c)
121: {
122: anthy_feature_list_add(fl, c + DEP_TYPE_FEATURE_BASE);
123: }
124:
125: void
126: anthy_feature_list_set_noun_cos(struct feature_list *fl, wtype_t wt)
127: {
128: int c;
129: if (anthy_wtype_get_pos(wt) != POS_NOUN) {
130: return ;
131: }
132: c = anthy_wtype_get_cos(wt);
133: if (c == COS_SUFFIX) {
134: anthy_feature_list_add(fl, COS_BASE + c);
135: }
136: }
137:
138: void
139: anthy_feature_list_set_mw_features(struct feature_list *fl, int mask)
140: {
141: if (mask & MW_FEATURE_WEAK_CONN) {
142: anthy_feature_list_add(fl, FEATURE_WEAK);
143: }
144: if (mask & MW_FEATURE_SUFFIX) {
145: anthy_feature_list_add(fl, FEATURE_SUFFIX);
146: }
147: if (mask & MW_FEATURE_SV) {
148: anthy_feature_list_add(fl, FEATURE_SV);
149: }
150: if (mask & MW_FEATURE_NUM) {
151: anthy_feature_list_add(fl, FEATURE_NUM);
152: }
153: if (mask & MW_FEATURE_CORE1) {
154: anthy_feature_list_add(fl, FEATURE_CORE1);
155: }
156: if (mask & MW_FEATURE_HIGH_FREQ) {
157: anthy_feature_list_add(fl, FEATURE_HIGH_FREQ);
158: }
159: if (mask & MW_FEATURE_WEAK_SEQ) {
160: anthy_feature_list_add(fl, FEATURE_WEAK_SEQ);
161: }
162: }
163:
164: void
165: anthy_feature_list_print(struct feature_list *fl)
166: {
167: int i;
168: printf("features=");
169: for (i = 0; i < fl->nr; i++) {
170: if (i) {
171: printf(",");
172: }
173: printf("%d", fl->u.index[i]);
174: }
175: printf("\n");
176: }
177:
178: static int
179: compare_line(const void *kp, const void *cp)
180: {
181: const int *f = kp;
182: const struct feature_freq *c = cp;
183: int i;
184: for (i = 0; i < NR_EM_FEATURES; i++) {
185: if (f[i] != (int)ntohl(c->f[i])) {
186: return f[i] - ntohl(c->f[i]);
187: }
188: }
189: return 0;
190: }
191:
192: struct feature_freq *
193: anthy_find_array_freq(const void *image, int *f, int nr,
194: struct feature_freq *arg)
195: {
196: struct feature_freq *res;
197: int nr_lines, i;
198: const int *array = (int *)image;
199: int n[NR_EM_FEATURES];
200: if (!image) {
201: return NULL;
202: }
203:
204: for (i = 0; i < NR_EM_FEATURES; i++) {
205: if (i < nr) {
206: n[i] = f[i];
207: } else {
208: n[i] = 0;
209: }
210: }
211:
212: nr_lines = ntohl(array[1]);
213: res = bsearch(n, &array[16], nr_lines,
214: sizeof(struct feature_freq),
215: compare_line);
216: if (!res) {
217: return NULL;
218: }
219: for (i = 0; i < NR_EM_FEATURES + 2; i++) {
220: arg->f[i] = ntohl(res->f[i]);
221: }
222: return arg;
223: }
224:
225: struct feature_freq *
226: anthy_find_feature_freq(const void *image,
227: const struct feature_list *fl,
228: struct feature_freq *arg)
229: {
230: int i, nr;
231: int f[NR_EM_FEATURES + 2];
232:
233:
234: nr = anthy_feature_list_nr(fl);
235: for (i = 0; i < NR_EM_FEATURES + 2; i++) {
236: if (i < nr) {
237: f[i] = anthy_feature_list_nth(fl, i);
238: } else {
239: f[i] = 0;
240: }
241: }
242: return anthy_find_array_freq(image, f, NR_EM_FEATURES, arg);
243: }
244:
245: void
246: anthy_init_features(void)
247: {
248: }