
1: /* 2: * 文節の構造metawordをソートする 3: * 4: * 文節に対する複数の構造の候補をソートする 5: * 6: * Copyright (C) 2000-2007 TABATA Yusuke 7: * 8: */ 9: /* 10: This library is free software; you can redistribute it and/or 11: modify it under the terms of the GNU Lesser General Public 12: License as published by the Free Software Foundation; either 13: version 2 of the License, or (at your option) any later version. 14: 15: This library is distributed in the hope that it will be useful, 16: but WITHOUT ANY WARRANTY; without even the implied warranty of 17: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18: Lesser General Public License for more details. 19: 20: You should have received a copy of the GNU Lesser General Public 21: License along with this library; if not, write to the Free Software 22: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23: */ 24: #include <stdlib.h> 25: #include <math.h> 26: 27: #include <anthy/segment.h> 28: #include <anthy/ordering.h> 29: #include <anthy/feature_set.h> 30: #include <anthy/splitter.h> 31: #include <anthy/diclib.h> 32: #include "sorter.h" 33: 34: static void *cand_info_array; 35: 36: static double 37: calc_probability(struct feature_list *fl) 38: { 39: struct feature_freq *res, arg; 40: res = anthy_find_feature_freq(cand_info_array, 41: fl, &arg); 42: if (res) { 43: double pos = (double)res->f[15]; 44: double neg = (double)res->f[14]; 45: double prob = pos / (pos + neg); 46: prob = prob * prob; 47: /**/ 48: return prob; 49: } 50: return 0; 51: } 52: 53: static void 54: mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg, 55: struct meta_word *mw) 56: { 57: int pc; 58: struct feature_list fl; 59: double prob; 60: (void)seg; 61: anthy_feature_list_init(&fl); 62: /**/ 63: anthy_feature_list_set_cur_class(&fl, mw->seg_class); 64: anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash); 65: anthy_feature_list_set_dep_class(&fl, mw->dep_class); 66: anthy_feature_list_set_mw_features(&fl, mw->mw_features); 67: /* 前の文節の素性 */ 68: if (prev_seg) { 69: pc = prev_seg->best_seg_class; 70: } else { 71: pc = SEG_HEAD; 72: } 73: anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class); 74: anthy_feature_list_sort(&fl); 75: /* 計算する */ 76: prob = 0.1 + calc_probability(&fl); 77: if (prob < 0) { 78: prob = (double)1 / (double)1000; 79: } 80: anthy_feature_list_free(&fl); 81: mw->struct_score = RATIO_BASE * RATIO_BASE; 82: mw->struct_score *= prob; 83: /* 84: anthy_feature_list_print(&fl); 85: printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score); 86: */ 87: 88: /**/ 89: if (mw->mw_features & MW_FEATURE_SUFFIX) { 90: mw->struct_score /= 2; 91: } 92: if (mw->mw_features & MW_FEATURE_WEAK_CONN) { 93: mw->struct_score /= 10; 94: } 95: } 96: 97: static void 98: seg_eval(struct seg_ent *prev_seg, 99: struct seg_ent *seg) 100: { 101: int i; 102: for (i = 0; i < seg->nr_metaword; i++) { 103: mw_eval(prev_seg, seg, seg->mw_array[i]); 104: } 105: } 106: 107: static void 108: sl_eval(struct segment_list *seg_list) 109: { 110: int i; 111: struct seg_ent *prev_seg = NULL; 112: for (i = 0; i < seg_list->nr_segments; i++) { 113: struct seg_ent *seg; 114: seg = anthy_get_nth_segment(seg_list, i); 115: seg_eval(prev_seg, seg); 116: prev_seg = seg; 117: } 118: } 119: 120: static int 121: metaword_compare_func(const void *p1, const void *p2) 122: { 123: const struct meta_word * const *s1 = p1; 124: const struct meta_word * const *s2 = p2; 125: return (*s2)->struct_score - (*s1)->struct_score; 126: } 127: 128: void 129: anthy_sort_metaword(struct segment_list *seg_list) 130: { 131: int i; 132: /**/ 133: sl_eval(seg_list); 134: /**/ 135: for (i = 0; i < seg_list->nr_segments; i++) { 136: struct seg_ent *seg = anthy_get_nth_segment(seg_list, i); 137: qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *), 138: metaword_compare_func); 139: } 140: } 141: 142: void 143: anthy_infosort_init(void) 144: { 145: cand_info_array = anthy_file_dic_get_section("cand_info"); 146: }knok: anthy/9100e/src-ordering/infosort.c:142-146 on Thu Feb 28 18:00:53 +0900 2008候補の順序付け情報を取り扱うようだ(要確認)