(linenum→info "unix/slp.c:2238")

anthy/9100e/calctrans/input_set.c

    1: /* 入力のセットを管理するコード
    2:  *
    3:  * Copyright (C) 2006 HANAOKA Toshiyuki
    4:  * Copyright (C) 2006-2007 TABATA Yusuke
    5:  *
    6:  * Special Thanks: Google Summer of Code Program 2006
    7:  *
    8:  */
    9: #include <stdio.h>
   10: #include <stdlib.h>
   11: #include <string.h>
   12: #include <math.h>
   13: #include "input_set.h"
   14: 
   15: #define HASH_SIZE 1024
   16: 
   17: struct int_map_node {
   18:   int key;
   19:   int val;
   20:   struct int_map_node *next;
   21: };
   22: 
   23: struct int_map {
   24:   /**/
   25:   int nr;
   26:   struct int_map_node *hash_head;
   27:   /**/
   28:   int array_size;
   29:   struct int_map_node **array;
   30: };
   31: 
   32: struct input_set {
   33:   /**/
   34:   struct input_line *lines;
   35:   struct input_line *buckets[HASH_SIZE];
   36:   /**/
   37:   struct int_map *feature_freq;
   38: };
   39: 
   40: static int
   41: line_hash(const int *ar, int nr)
   42: {
   43:   int i;
   44:   unsigned h = 0;
   45:   for (i = 0; i < nr; i++) {
   46:     h += ar[i];
   47:   }
   48:   return (h % HASH_SIZE);
   49: }
   50: 
   51: static struct input_line *
   52: find_same_line(struct input_set *is, int *features, int nr)
   53: {
   54:   struct input_line *il;
   55:   int h = line_hash(features, nr);
   56:   for (il = is->buckets[h]; il; il = il->next_in_hash) {
   57:     int i;
   58:     if (il->nr_features != nr) {
   59:       continue;
   60:     }
   61:     for (i = 0; i < nr; i++) {
   62:       if (il->features[i] != features[i]) {
   63:         break;
   64:       }
   65:     }
   66:     if (i >= nr) {
   67:       return il;
   68:     }
   69:   }
   70:   return NULL;
   71: }
   72: 
   73: static struct input_line *
   74: add_line(struct input_set *is, int *features, int nr)
   75: {
   76:   int i, h;
   77:   struct input_line *il;
   78:   il = malloc(sizeof(struct input_line));
   79:   il->nr_features = nr;
   80:   il->features = malloc(sizeof(int) * nr);
   81:   for (i = 0; i < nr; i++) {
   82:     il->features[i] = features[i];
   83:   }
   84:   il->weight = 0;
   85:   il->negative_weight = 0;
   86:   /* link */
   87:   il->next_line = is->lines;
   88:   is->lines = il;
   89:   /**/
   90:   h = line_hash(features, nr);
   91:   il->next_in_hash = is->buckets[h];
   92:   is->buckets[h] = il;
   93:   return il;
   94: }
   95: 
   96: static void
   97: add_feature_count(struct int_map *im, int nr, int *features, int weight)
   98: {
   99:   int i;
  100:   for (i = 0; i < nr; i++) {
  101:     int f = features[i];
  102:     int v = int_map_peek(im, f);
  103:     int_map_set(im, f, v + weight);
  104:   }
  105: }
  106: 
  107: /* input_setに入力を一つ加える */
  108: void
  109: input_set_set_features(struct input_set *is, int *features,
  110:                        int nr, int weight)
  111: {
  112:   struct input_line *il;
  113:   int abs_weight = abs(weight);
  114: 
  115:   /**/
  116:   il = find_same_line(is, features, nr);
  117:   if (!il) {
  118:     il = add_line(is, features, nr);
  119:   }
  120:   /**/
  121:   if (weight > 0) {
  122:     il->weight += weight;
  123:   } else {
  124:     il->negative_weight += abs_weight;
  125:   }
  126:   /**/
  127:   add_feature_count(is->feature_freq, nr, features, abs_weight);
  128: }
  129: 
  130: struct input_set *
  131: input_set_create(void)
  132: {
  133:   int i;
  134:   struct input_set *is;
  135:   is = malloc(sizeof(struct input_set));
  136:   is->lines = NULL;
  137:   /**/
  138:   for (i = 0; i < HASH_SIZE; i++) {
  139:     is->buckets[i] = NULL;
  140:   }
  141:   /**/
  142:   is->feature_freq = int_map_new();
  143:   /**/
  144:   return is;
  145: }
  146: 
  147: struct input_line *
  148: input_set_get_input_line(struct input_set *is)
  149: {
  150:   return is->lines;
  151: }
  152: 
  153: struct input_set *
  154: input_set_filter(struct input_set *is,
  155:                  double pos, double neg)
  156: {
  157:   struct input_set *new_is = input_set_create();
  158:   struct input_line *il;
  159:   for (il = is->lines; il; il = il->next_line) {
  160:     if (il->weight > pos ||
  161:         il->negative_weight > neg) {
  162:       input_set_set_features(new_is, il->features,
  163:                              il->nr_features, il->weight);
  164:       input_set_set_features(new_is, il->features,
  165:                              il->nr_features, -il->negative_weight);
  166:     }
  167:   }
  168:   return new_is;
  169: }
  170: 
  171: void
  172: input_set_output_feature_freq(FILE *fp, struct input_set *is)
  173: {
  174:   int i;
  175:   struct int_map *im = is->feature_freq;
  176:   fprintf(fp, "0 %d\n", im->array_size);
  177:   int_map_flatten(im);
  178:   for (i = 0; i < im->array_size; i++) {
  179:     if (!im->array[i]) {
  180:       fprintf(fp, "0 0\n");
  181:     } else {
  182:       fprintf(fp, "%d %d\n", im->array[i]->key,
  183:               im->array[i]->val);
  184:     }
  185:   }
  186: }
  187: 
  188: struct int_map *
  189: int_map_new(void)
  190: {
  191:   int i;
  192:   struct int_map *im = malloc(sizeof(struct int_map));
  193:   im->nr = 0;
  194:   im->hash_head = malloc(sizeof(struct int_map_node) * HASH_SIZE);
  195:   for (i = 0; i < HASH_SIZE; i++) {
  196:     im->hash_head[i].next = NULL;
  197:   }
  198:   return im;
  199: }
  200: 
  201: static int
  202: node_index(int idx)
  203: {
  204:   return idx % HASH_SIZE;
  205: }
  206: 
  207: static struct int_map_node *
  208: find_int_map_node(struct int_map *im, int idx)
  209: {
  210:   struct int_map_node *node;
  211:   int h = node_index(idx);
  212:   for (node = im->hash_head[h].next; node; node = node->next) {
  213:     if (node->key == idx) {
  214:       return node;
  215:     }
  216:   }
  217:   return NULL;
  218: }
  219: 
  220: int
  221: int_map_peek(struct int_map *im, int idx)
  222: {
  223:   struct int_map_node *node = find_int_map_node(im, idx);
  224:   if (node) {
  225:     return node->val;
  226:   }
  227:   return 0;
  228: }
  229: 
  230: void
  231: int_map_set(struct int_map *im, int idx, int val)
  232: {
  233:   struct int_map_node *node = find_int_map_node(im, idx);
  234:   int h;
  235:   if (node) {
  236:     node->val = val;
  237:     return ;
  238:   }
  239:   /**/
  240:   node = malloc(sizeof(struct int_map_node));
  241:   node->key = idx;
  242:   node->val = val;
  243:   h = node_index(idx);
  244:   node->next = im->hash_head[h].next;
  245:   im->hash_head[h].next = node;
  246:   /**/
  247:   im->nr ++;
  248: }
  249: 
  250: void
  251: int_map_flatten(struct int_map *im)
  252: {
  253:   int i;
  254:   struct int_map_node *node;
  255:   int max_n = 0;
  256:   /* 配列を準備する */
  257:   im->array_size = im->nr * 2;
  258:   im->array = malloc(sizeof(struct int_map_node *) * 
  259:                      im->array_size);
  260:   for (i = 0; i < im->array_size; i++) {
  261:     im->array[i] = NULL;
  262:   }
  263:   /* 要素を置いていく */
  264:   for (i = 0; i < HASH_SIZE; i++) {
  265:     for (node = im->hash_head[i].next; node; node = node->next) {
  266:       int n = 0;
  267:       while (1) {
  268:         int h;
  269:         h = node->key + n;
  270:         h %= im->array_size;
  271:         if (!im->array[h]) {
  272:           im->array[h] = node;
  273:           break;
  274:         }
  275:         /**/
  276:         n++;
  277:       }
  278:       if (n > max_n) {
  279:         max_n = n;
  280:       }
  281:     }
  282:   }
  283:   /**/
  284:   printf(" max_collision=%d\n", max_n);
  285: }
Syntax (Markdown)