1:
2: #ifndef _wordborder_h_included_
3: #define _wordborder_h_included_
4:
5:
6: #include <anthy/dic.h>
7: #include <anthy/alloc.h>
8: #include <anthy/segclass.h>
9: #include <anthy/depgraph.h>
10:
11: struct splitter_context;
12:
13:
14:
15:
16: enum mw_check {
17:
18: MW_CHECK_NONE,
19:
20: MW_CHECK_SINGLE,
21: MW_CHECK_BORDER,
22: MW_CHECK_WRAP,
23: MW_CHECK_OCHAIRE,
24: MW_CHECK_NUMBER,
25: MW_CHECK_COMPOUND
26: };
27:
28:
29:
30:
31:
32: struct char_node {
33: int max_len;
34: struct meta_word *mw;
35: struct word_list *wl;
36: };
37:
38:
39:
40:
41:
42: struct word_split_info_cache {
43: struct char_node *cnode;
44:
45:
46:
47: int *seq_len;
48:
49: int *rev_seq_len;
50:
51: int *seg_border;
52:
53: enum seg_class* best_seg_class;
54:
55: struct meta_word **best_mw;
56:
57: allocator MwAllocator, WlAllocator;
58: };
59:
60:
61:
62:
63: enum mw_status {
64: MW_STATUS_NONE,
65:
66: MW_STATUS_WRAPPED,
67:
68: MW_STATUS_COMBINED,
69:
70: MW_STATUS_COMPOUND,
71:
72: MW_STATUS_COMPOUND_PART,
73:
74: MW_STATUS_OCHAIRE
75: };
76:
77:
78:
79:
80: extern struct metaword_type_tab_ {
81: enum metaword_type type;
82: const char *name;
83: enum mw_status status;
84: enum mw_check check;
85: } anthy_metaword_type_tab[];
86:
87:
88:
89:
90:
91:
92: #define NR_PARTS 4
93: #define PART_PREFIX 0
94: #define PART_CORE 1
95: #define PART_POSTFIX 2
96: #define PART_DEPWORD 3
97:
98: struct part_info {
99:
100: int from, len;
101:
102: wtype_t wt;
103: seq_ent_t seq;
104:
105: int freq;
106:
107: enum dep_class dc;
108: };
109:
110:
111:
112:
113:
114: struct word_list {
115:
116: int from, len;
117: int is_compound;
118:
119:
120: int dep_word_hash;
121: int mw_features;
122:
123: enum seg_class seg_class;
124: enum constraint_stat can_use;
125:
126:
127: int head_pos;
128: int tail_ct;
129:
130:
131: int last_part;
132: struct part_info part[NR_PARTS];
133:
134:
135: int node_id;
136:
137:
138: struct word_list *next;
139: };
140:
141:
142:
143: #define SPLITTER_DEBUG_NONE 0
144:
145: #define SPLITTER_DEBUG_WL 1
146:
147: #define SPLITTER_DEBUG_MW 2
148:
149: #define SPLITTER_DEBUG_LN 4
150:
151: #define SPLITTER_DEBUG_ID 8
152:
153: #define SPLITTER_DEBUG_CAND 16
154:
155: int anthy_splitter_debug_flags(void);
156:
157:
158:
159:
160: void anthy_scan_node(struct splitter_context *sc,
161: struct word_list *wl,
162: xstr *follow, int node);
163: int anthy_get_node_id_by_name(const char *name);
164: int anthy_init_depword_tab(void);
165: void anthy_quit_depword_tab(void);
166:
167:
168: int anthy_get_nr_dep_rule(void);
169: void anthy_get_nth_dep_rule(int, struct wordseq_rule *);
170:
171:
172: void anthy_commit_word_list(struct splitter_context *, struct word_list *wl);
173: struct word_list *anthy_alloc_word_list(struct splitter_context *);
174: void anthy_print_word_list(struct splitter_context *, struct word_list *);
175: void anthy_make_word_list_all(struct splitter_context *);
176:
177:
178: void anthy_commit_meta_word(struct splitter_context *, struct meta_word *mw);
179: void anthy_make_metaword_all(struct splitter_context *);
180: void anthy_print_metaword(struct splitter_context *, struct meta_word *);
181:
182: void anthy_mark_border_by_metaword(struct splitter_context* sc,
183: struct meta_word* mw);
184:
185:
186:
187: void anthy_eval_border(struct splitter_context *, int, int, int);
188:
189:
190: void anthy_mark_borders(struct splitter_context *sc, int from, int to);
191:
192:
193: void anthy_set_seg_class(struct word_list* wl);
194:
195:
196: extern wtype_t anthy_wtype_noun;
197: extern wtype_t anthy_wtype_name_noun;
198: extern wtype_t anthy_wtype_num_noun;
199: extern wtype_t anthy_wtype_prefix;
200: extern wtype_t anthy_wtype_num_prefix;
201: extern wtype_t anthy_wtype_num_postfix;
202: extern wtype_t anthy_wtype_name_postfix;
203: extern wtype_t anthy_wtype_sv_postfix;
204: extern wtype_t anthy_wtype_a_tail_of_v_renyou;
205: extern wtype_t anthy_wtype_v_renyou;
206: extern wtype_t anthy_wtype_noun_tail;
207: extern wtype_t anthy_wtype_n1;
208: extern wtype_t anthy_wtype_n10;
209:
210: #endif