1:
2: #ifndef _splitter_h_included_
3: #define _splitter_h_included_
4:
5: #include <anthy/dic.h>
6: #include <anthy/xstr.h>
7: #include <anthy/wtype.h>
8: #include <anthy/segclass.h>
9:
10:
11: #define RATIO_BASE 256
12: #define OCHAIRE_SCORE 5000000
13:
14:
15:
16:
17: struct splitter_context {
18:
19: struct word_split_info_cache *word_split_info;
20: int char_count;
21: int is_reverse;
22: struct char_ent {
23: xchar *c;
24: int seg_border;
25: int initial_seg_len;
26:
27: enum seg_class best_seg_class;
28: struct meta_word* best_mw;
29: }*ce;
30: };
31:
32:
33: enum constraint_stat {
34: unchecked, ok, ng
35: };
36:
37:
38: enum metaword_type {
39:
40: MW_DUMMY,
41:
42: MW_SINGLE,
43:
44: MW_WRAP,
45:
46: MW_COMPOUND_HEAD,
47:
48: MW_COMPOUND,
49:
50: MW_COMPOUND_LEAF,
51:
52: MW_COMPOUND_PART,
53:
54: MW_V_RENYOU_A,
55:
56: MW_V_RENYOU_NOUN,
57:
58: MW_NUMBER,
59:
60: MW_OCHAIRE,
61:
62: MW_END
63: };
64:
65: #define MW_FEATURE_NONE 0
66: #define MW_FEATURE_SV 1
67: #define MW_FEATURE_WEAK_CONN 2
68: #define MW_FEATURE_SUFFIX 4
69: #define MW_FEATURE_NUM 16
70: #define MW_FEATURE_CORE1 32
71: #define MW_FEATURE_DEP_ONLY 64
72: #define MW_FEATURE_HIGH_FREQ 128
73: #define MW_FEATURE_WEAK_SEQ 256
74:
75:
76:
77:
78:
79:
80: struct meta_word {
81: int from, len;
82:
83: int score;
84:
85: int struct_score;
86:
87: int dep_word_hash;
88: int mw_features;
89: wtype_t core_wt;
90: enum dep_class dep_class;
91:
92: enum seg_class seg_class;
93: enum constraint_stat can_use;
94: enum metaword_type type;
95: struct word_list *wl;
96: struct meta_word *mw1, *mw2;
97: xstr cand_hint;
98:
99: int nr_parts;
100:
101:
102: struct meta_word *next;
103: };
104:
105: int anthy_init_splitter(void);
106: void anthy_quit_splitter(void);
107:
108: void anthy_init_split_context(xstr *xs, struct splitter_context *, int is_reverse);
109:
110:
111:
112:
113: void anthy_mark_border(struct splitter_context *, int from, int from2, int to);
114: void anthy_commit_border(struct splitter_context *, int nr,
115: struct meta_word **mw, int *len);
116: void anthy_release_split_context(struct splitter_context *c);
117:
118:
119: int anthy_get_nr_metaword(struct splitter_context *, int from, int len);
120: struct meta_word *anthy_get_nth_metaword(struct splitter_context *,
121: int from, int len, int nth);
122:
123: int anthy_dep_word_hash(xstr *xs);
124:
125:
126: #endif