1:
2:
3:
4:
5:
6: #include <string.h>
7: #include "config.h"
8:
9: #include <anthy/xstr.h>
10: #include <anthy/xchar.h>
11:
12: #include "diclib_inner.h"
13:
14: #define PAGE_SIZE 128
15: #define NR_PAGES 512
16: #include "e2u.h"
17: #include "u2e.h"
18:
19:
20: static struct xchar_ent {
21: const xchar xc;
22: const int type;
23: struct xchar_ent *next;
24: } xchar_tab[] =
25: {
26: {0x309b, XCT_CLOSE, 0},
27: {0xff08, XCT_OPEN, 0},
28: {0xff09, XCT_CLOSE, 0},
29: {0x3014, XCT_OPEN, 0},
30: {0x3015, XCT_CLOSE, 0},
31: {0xff3b, XCT_OPEN, 0},
32: {0xff3d, XCT_CLOSE, 0},
33: {0xff5b, XCT_OPEN, 0},
34: {0xff5d, XCT_CLOSE, 0},
35: {0x3008, XCT_OPEN, 0},
36: {0x3009, XCT_CLOSE, 0},
37: {0x300a, XCT_OPEN, 0},
38: {0x300b, XCT_CLOSE, 0},
39: {0x300c, XCT_OPEN, 0},
40: {0x300d, XCT_CLOSE, 0},
41: {0x300e, XCT_OPEN, 0},
42: {0x300f, XCT_CLOSE, 0},
43: {0x3010, XCT_OPEN, 0},
44: {0x3011, XCT_CLOSE, 0},
45: {0x3001, XCT_PUNCTUATION, 0},
46: {0x3002, XCT_PUNCTUATION, 0},
47: {0xff0c, XCT_PUNCTUATION, 0},
48: {0xff0e, XCT_PUNCTUATION, 0},
49: {0xff1f, XCT_PUNCTUATION, 0},
50: {0xff01, XCT_PUNCTUATION, 0},
51:
52: {28, XCT_OPEN, 0},
53: {133, XCT_OPEN, 0},
54: {29, XCT_CLOSE, 0},
55: {135, XCT_CLOSE, 0},
56: {HK_TO, XCT_DEP, 0},
57: {HK_HA, XCT_DEP, 0},
58: {HK_NO, XCT_DEP, 0},
59: {HK_NI, XCT_DEP, 0},
60: {HK_GA, XCT_DEP, 0},
61: {HK_WO, XCT_DEP, 0},
62: {WIDE_0, XCT_WIDENUM, 0},
63: {WIDE_1, XCT_WIDENUM, 0},
64: {WIDE_2, XCT_WIDENUM, 0},
65: {WIDE_3, XCT_WIDENUM, 0},
66: {WIDE_4, XCT_WIDENUM, 0},
67: {WIDE_5, XCT_WIDENUM, 0},
68: {WIDE_6, XCT_WIDENUM, 0},
69: {WIDE_7, XCT_WIDENUM, 0},
70: {WIDE_8, XCT_WIDENUM, 0},
71: {WIDE_9, XCT_WIDENUM, 0},
72: {HK_DDOT, XCT_PART, 0},
73: {HK_XA, XCT_PART, 0},
74: {HK_XI, XCT_PART, 0},
75: {HK_XU, XCT_PART, 0},
76: {HK_XE, XCT_PART, 0},
77: {HK_XO, XCT_PART, 0},
78: {HK_XYA, XCT_PART, 0},
79: {HK_XYU, XCT_PART, 0},
80: {HK_XYO, XCT_PART, 0},
81: {HK_TT, XCT_PART, 0},
82: {0, 0, 0},
83: };
84:
85: #define DDOT 0x8ede
86: #define CIRCLE 0x8edf
87:
88: static const struct half_kana_table half_kana_tab[] = {
89: {HK_A,0x8eb1,0},
90: {HK_I,0x8eb2,0},
91: {HK_U,0x8eb3,0},
92: {HK_E,0x8eb4,0},
93: {HK_O,0x8eb5,0},
94: {HK_KA,0x8eb6,0},
95: {HK_KI,0x8eb7,0},
96: {HK_KU,0x8eb8,0},
97: {HK_KE,0x8eb9,0},
98: {HK_KO,0x8eba,0},
99: {HK_SA,0x8ebb,0},
100: {HK_SI,0x8ebc,0},
101: {HK_SU,0x8ebd,0},
102: {HK_SE,0x8ebe,0},
103: {HK_SO,0x8ebf,0},
104: {HK_TA,0x8ec0,0},
105: {HK_TI,0x8ec1,0},
106: {HK_TU,0x8ec2,0},
107: {HK_TE,0x8ec3,0},
108: {HK_TO,0x8ec4,0},
109: {HK_NA,0x8ec5,0},
110: {HK_NI,0x8ec6,0},
111: {HK_NU,0x8ec7,0},
112: {HK_NE,0x8ec8,0},
113: {HK_NO,0x8ec9,0},
114: {HK_HA,0x8eca,0},
115: {HK_HI,0x8ecb,0},
116: {HK_HU,0x8ecc,0},
117: {HK_HE,0x8ecd,0},
118: {HK_HO,0x8ece,0},
119: {HK_MA,0x8ecf,0},
120: {HK_MI,0x8ed0,0},
121: {HK_MU,0x8ed1,0},
122: {HK_ME,0x8ed2,0},
123: {HK_MO,0x8ed3,0},
124: {HK_YA,0x8ed4,0},
125: {HK_YU,0x8ed5,0},
126: {HK_YO,0x8ed6,0},
127: {HK_RA,0x8ed7,0},
128: {HK_RI,0x8ed8,0},
129: {HK_RU,0x8ed9,0},
130: {HK_RE,0x8eda,0},
131: {HK_RO,0x8edb,0},
132: {HK_WA,0x8edc,0},
133: {HK_WI,0,0},
134: {HK_WE,0,0},
135: {HK_WO,0x8ea6,0},
136: {HK_N,0x8edd,0},
137: {HK_TT,0x8eaf,0},
138: {HK_XA,0x8ea7,0},
139: {HK_XI,0x8ea8,0},
140: {HK_XU,0x8ea9,0},
141: {HK_XE,0x8eaa,0},
142: {HK_XO,0x8eab,0},
143: {HK_GA,0x8eb6,DDOT},
144: {HK_GI,0x8eb7,DDOT},
145: {HK_GU,0x8eb8,DDOT},
146: {HK_GE,0x8eb9,DDOT},
147: {HK_GO,0x8eba,DDOT},
148: {HK_ZA,0x8ebb,DDOT},
149: {HK_ZI,0x8ebc,DDOT},
150: {HK_ZU,0x8ebd,DDOT},
151: {HK_ZE,0x8ebe,DDOT},
152: {HK_ZO,0x8ebf,DDOT},
153: {HK_DA,0x8ec0,DDOT},
154: {HK_DI,0x8ec1,DDOT},
155: {HK_DU,0x8ec2,DDOT},
156: {HK_DE,0x8ec3,DDOT},
157: {HK_DO,0x8ec4,DDOT},
158: {HK_BA,0x8eca,DDOT},
159: {HK_BI,0x8ecb,DDOT},
160: {HK_BU,0x8ecc,DDOT},
161: {HK_BE,0x8ecd,DDOT},
162: {HK_BO,0x8ece,DDOT},
163: {HK_PA,0x8eca,CIRCLE},
164: {HK_PI,0x8ecb,CIRCLE},
165: {HK_PU,0x8ecc,CIRCLE},
166: {HK_PE,0x8ecd,CIRCLE},
167: {HK_PO,0x8ece,CIRCLE},
168: {HK_XYA,0x8eac,0},
169: {HK_XYU,0x8ead,0},
170: {HK_XYO,0x8eae,0},
171: {HK_XWA,0,0},
172: {HK_DDOT,DDOT,0},
173: {HK_BAR,0x8eb0,0},
174: {0,0,0}
175: };
176:
177: static const struct half_wide_ent {
178: const xchar half;
179: const xchar wide;
180: } half_wide_tab[] = {
181: {'!', 0xff01},
182: {'\"', 0x201d},
183: {'#', 0xff03},
184: {'$', 0xff04},
185: {'%', 0xff05},
186: {'&', 0xff06},
187: {'\'', 0x2019},
188: {'(', 0xff08},
189: {')', 0xff09},
190: {'*', 0xff0a},
191: {'+', 0xff0b},
192: {',', 0xff0c},
193: {'-', 0xff0d},
194: {'.', 0xff0e},
195: {'/', 0xff0f},
196: {':', 0xff1a},
197: {';', 0xff1b},
198: {'<', 0xff1c},
199: {'=', 0xff1d},
200: {'>', 0xff1e},
201: {'?', 0xff1f},
202: {'@', 0xff20},
203: {'[', 0xff3b},
204: {'\\', 0xff3c},
205: {']', 0xff3d},
206: {'^', 0xff3e},
207: {'_', 0xff3f},
208: {'`', 0xff40},
209: {'{', 0xff5b},
210: {'|', 0xff5c},
211: {'}', 0xff5d},
212: {'~', 0xff5e},
213: {0, 0}
214: };
215:
216: xchar
217: anthy_lookup_half_wide(xchar xc)
218: {
219: const struct half_wide_ent *hw;
220: for (hw = half_wide_tab; hw->half; hw ++) {
221: if (hw->half == xc) {
222: return hw->wide;
223: }
224: if (hw->wide == xc) {
225: return hw->half;
226: }
227: }
228: return 0;
229: }
230:
231: const struct half_kana_table *
232: anthy_find_half_kana(xchar xc)
233: {
234: const struct half_kana_table *tab;
235: for (tab = half_kana_tab; tab->src; tab ++) {
236: if (tab->src == xc && tab->dst) {
237: return tab;
238: }
239: }
240: return NULL;
241: }
242:
243: static int
244: find_xchar_type(xchar xc)
245: {
246: struct xchar_ent *xe = xchar_tab;
247:
248: for (; xe->xc; xe++) {
249: if (xe->xc == xc) {
250: return xe->type;
251: }
252: }
253:
254: return XCT_NONE;
255: }
256:
257: static int
258: is_hira(xchar xc)
259: {
260: if (xc == HK_DDOT) {
261: return 1;
262: }
263: if (xc == HK_BAR) {
264: return 1;
265: }
266: xc = anthy_ucs_to_euc(xc);
267: if ((xc & 0xff00) == 0xa400) {
268: return 1;
269: }
270: return 0;
271: }
272:
273: static int
274: is_kata(xchar xc)
275: {
276: if (xc == HK_BAR) {
277: return 1;
278: }
279: xc = anthy_ucs_to_euc(xc);
280: if ((xc & 0xff00) == 0xa500) {
281: return 1;
282: }
283: return 0;
284: }
285:
286: static int
287: is_symbol(xchar xc)
288: {
289: if (xc == UCS_GETA) {
290: return 1;
291: }
292: xc = anthy_ucs_to_euc(xc);
293: if (xc == EUC_GETA) {
294: return 0;
295: }
296: if ((xc & 0xff00) == 0xa100) {
297: return 1;
298: }
299: if ((xc & 0xff00) == 0xa200) {
300: return 1;
301: }
302: return 0;
303: }
304:
305: static int
306: is_kanji(xchar xc)
307: {
308: if (xc > 0x4e00 && xc < 0xa000) {
309: return 1;
310: }
311: return 0;
312: }
313:
314: static int
315: search(const int *tab[], int v, int geta)
316: {
317: int page = v / PAGE_SIZE;
318: int off = v % PAGE_SIZE;
319: const int *t;
320: if (page >= NR_PAGES) {
321: return geta;
322: }
323: t = tab[page];
324: if (!t) {
325: return geta;
326: }
327: if (!t[off] && v) {
328: return geta;
329: }
330: return t[off];
331: }
332:
333: int
334: anthy_euc_to_ucs(int ec)
335: {
336: return search(e2u_index, ec, UCS_GETA);
337: }
338:
339: int
340: anthy_ucs_to_euc(int uc)
341: {
342: int r = search(u2e_index, uc, EUC_GETA);
343: if (r > 65536) {
344: return EUC_GETA;
345: }
346: return r;
347: }
348:
349: int
350: anthy_get_xchar_type(const xchar xc)
351: {
352: int t = find_xchar_type(xc);
353: if (xc > 47 && xc < 58) {
354: t |= XCT_NUM;
355: }
356: if (xc < 128) {
357: t |= XCT_ASCII;
358: }
359: if (is_hira(xc)) {
360: t |= XCT_HIRA;
361: }
362: if (is_kata(xc)) {
363: t |= XCT_KATA;
364: }
365: if (is_symbol(xc)) {
366: if (!(t & XCT_OPEN) && !(t & XCT_CLOSE)) {
367: t |= XCT_SYMBOL;
368: }
369: }
370: if (is_kanji(xc)) {
371: t |= XCT_KANJI;
372: }
373: return t;
374: }
375:
376: int
377: anthy_get_xstr_type(const xstr *xs)
378: {
379: int i, t = XCT_ALL;
380: for (i = 0; i < xs->len; i++) {
381: t &= anthy_get_xchar_type(xs->str[i]);
382: }
383: return t;
384: }
385:
386: int
387: anthy_xchar_to_num(xchar xc)
388: {
389: switch (xc) {
390: case WIDE_0:return 0;
391: case WIDE_1:return 1;
392: case WIDE_2:return 2;
393: case WIDE_3:return 3;
394: case WIDE_4:return 4;
395: case WIDE_5:return 5;
396: case WIDE_6:return 6;
397: case WIDE_7:return 7;
398: case WIDE_8:return 8;
399: case WIDE_9:return 9;
400: }
401: if (xc >= '0' && xc <= '9') {
402: return xc - (int)'0';
403: }
404: return -1;
405: }
406:
407: xchar
408: anthy_xchar_wide_num_to_num(xchar c)
409: {
410: switch (c) {
411: case WIDE_0:return '0';
412: case WIDE_1:return '1';
413: case WIDE_2:return '2';
414: case WIDE_3:return '3';
415: case WIDE_4:return '4';
416: case WIDE_5:return '5';
417: case WIDE_6:return '6';
418: case WIDE_7:return '7';
419: case WIDE_8:return '8';
420: case WIDE_9:return '9';
421: default:return c;
422: }
423: }
424:
425: void
426: anthy_init_xchar_tab(void)
427: {
428: }