(linenum→info "unix/slp.c:2238")

anthy/9100e/src-ordering/infosort.c

    1: /*
    2:  * 文節の構造metawordをソートする
    3:  *
    4:  * 文節に対する複数の構造の候補をソートする
    5:  *
    6:  * Copyright (C) 2000-2007 TABATA Yusuke
    7:  *
    8:  */
    9: /*
   10:   This library is free software; you can redistribute it and/or
   11:   modify it under the terms of the GNU Lesser General Public
   12:   License as published by the Free Software Foundation; either
   13:   version 2 of the License, or (at your option) any later version.
   14: 
   15:   This library is distributed in the hope that it will be useful,
   16:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   17:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   18:   Lesser General Public License for more details.
   19: 
   20:   You should have received a copy of the GNU Lesser General Public
   21:   License along with this library; if not, write to the Free Software
   22:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   23:  */
   24: #include <stdlib.h>
   25: #include <math.h>
   26: 
   27: #include <anthy/segment.h>
   28: #include <anthy/ordering.h>
   29: #include <anthy/feature_set.h>
   30: #include <anthy/splitter.h>
   31: #include <anthy/diclib.h>
   32: #include "sorter.h"
   33: 
   34: static void *cand_info_array;
   35: 
   36: static double
   37: calc_probability(struct feature_list *fl)
   38: {
   39:   struct feature_freq *res, arg;
   40:   res = anthy_find_feature_freq(cand_info_array,
   41:                                 fl, &arg);
   42:   if (res) {
   43:     double pos = (double)res->f[15];
   44:     double neg = (double)res->f[14];
   45:     double prob = pos / (pos + neg);
   46:     prob = prob * prob;
   47:     /**/
   48:     return prob;
   49:   }
   50:   return 0;
   51: }
   52: 
   53: static void
   54: mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg,
   55:         struct meta_word *mw)
   56: {
   57:   int pc;
   58:   struct feature_list fl;
   59:   double prob;
   60:   (void)seg;
   61:   anthy_feature_list_init(&fl);
   62:   /**/
   63:   anthy_feature_list_set_cur_class(&fl, mw->seg_class);
   64:   anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash);
   65:   anthy_feature_list_set_dep_class(&fl, mw->dep_class);
   66:   anthy_feature_list_set_mw_features(&fl, mw->mw_features);
   67:   /* 前の文節の素性 */
   68:   if (prev_seg) {
   69:     pc = prev_seg->best_seg_class;
   70:   } else {
   71:     pc = SEG_HEAD;
   72:   }
   73:   anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class);
   74:   anthy_feature_list_sort(&fl);
   75:   /* 計算する */
   76:   prob = 0.1 + calc_probability(&fl);
   77:   if (prob < 0) {
   78:     prob = (double)1 / (double)1000;
   79:   }
   80:   anthy_feature_list_free(&fl);
   81:   mw->struct_score = RATIO_BASE * RATIO_BASE;
   82:   mw->struct_score *= prob;
   83:   /*
   84:   anthy_feature_list_print(&fl);
   85:   printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score);
   86:   */
   87: 
   88:   /**/
   89:   if (mw->mw_features & MW_FEATURE_SUFFIX) {
   90:     mw->struct_score /= 2;
   91:   }
   92:   if (mw->mw_features & MW_FEATURE_WEAK_CONN) {
   93:     mw->struct_score /= 10;
   94:   }
   95: }
   96: 
   97: static void
   98: seg_eval(struct seg_ent *prev_seg,
   99:          struct seg_ent *seg)
  100: {
  101:   int i;
  102:   for (i = 0; i < seg->nr_metaword; i++) {
  103:     mw_eval(prev_seg, seg, seg->mw_array[i]);
  104:   }
  105: }
  106: 
  107: static void
  108: sl_eval(struct segment_list *seg_list)
  109: {
  110:   int i;
  111:   struct seg_ent *prev_seg = NULL;
  112:   for (i = 0; i < seg_list->nr_segments; i++) {
  113:     struct seg_ent *seg;
  114:     seg = anthy_get_nth_segment(seg_list, i);
  115:     seg_eval(prev_seg, seg);
  116:     prev_seg = seg;
  117:   }
  118: }
  119: 
  120: static int
  121: metaword_compare_func(const void *p1, const void *p2)
  122: {
  123:   const struct meta_word * const *s1 = p1;
  124:   const struct meta_word * const *s2 = p2;
  125:   return (*s2)->struct_score - (*s1)->struct_score;
  126: }
  127: 
  128: void
  129: anthy_sort_metaword(struct segment_list *seg_list)
  130: {
  131:   int i;
  132:   /**/
  133:   sl_eval(seg_list);
  134:   /**/
  135:   for (i = 0; i < seg_list->nr_segments; i++) {
  136:     struct seg_ent *seg = anthy_get_nth_segment(seg_list, i);
  137:     qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *),
  138:           metaword_compare_func);
  139:   }
  140: }
  141: 
  142: void
  143: anthy_infosort_init(void)
  144: {
  145:   cand_info_array = anthy_file_dic_get_section("cand_info");
  146: }
Permalink to this note knok: anthy/9100e/src-ordering/infosort.c:142-146 on Thu Feb 28 18:00:53 +0900 2008

候補の順序付け情報を取り扱うようだ(要確認)

Syntax (Markdown)