(linenum→info "unix/slp.c:2238")

anthy/9100e/src-worddic/matrix.c

    1: /*
    2:  * 疎行列を扱うためのコード
    3:  *
    4:  * (1) 行列(sparse_matrix)のインスタンスを作成し行列の要素を設定する
    5:  * (2) 行列から行列イメージ(matrix_image)を作成する
    6:  *  *  行列イメージをnetwork byteorderでファイルに書き出す
    7:  * (3) 行列イメージを読み込み(or mmapする)要素にアクセスする
    8:  *
    9:  */
   10: /*
   11:  * sparse matrix crammer
   12:  *
   13:  *  sparse matrix storage uses following 2 sparse arrays
   14:  *   *array of row
   15:  *   *array of cells in a row
   16:  *
   17:  *(1/2)
   18:  * sparse row    crammed row
   19:  *  0:0                1:1
   20:  *  1:1     ---->>     3:1
   21:  *  2:0   hash(h)%m    7:1
   22:  *  3:1       /
   23:  *  4:0      /
   24:  *  5:0     /
   25:  *  6:0
   26:  *  7:1
   27:  *  8:0
   28:  *     (?:1 means non-all 0 row)
   29:  *(2/2)
   30:  * crammed row      cram        shift count
   31:  *  1:1    .    .    -> ..      shift 0
   32:  *  3:1 .   .        ->   ..    shift 2
   33:  *  7:1   .  .    .  ->     ... shift 4
   34:  *
   35:  *     contents of        |
   36:  *         matrix        \|/
   37:  *
   38:  *                      ....... unified array of (value.column) pair
   39:  *
   40:  *  matrix image
   41:  *   image[0] : length of hashed row array
   42:  *   image[1] : length of crammed cell array
   43:  *   image[2 ~ 2+image[0]-1] : hashed row array
   44:  *   image[2+image[0] ~ 2+image[0]+image[1]-1] : hashed row array
   45:  *
   46:  * Copyright (C) 2005 TABATA Yusuke
   47:  *
   48:  */
   49: /*
   50:   This library is free software; you can redistribute it and/or
   51:   modify it under the terms of the GNU Lesser General Public
   52:   License as published by the Free Software Foundation; either
   53:   version 2 of the License, or (at your option) any later version.
   54: 
   55:   This library is distributed in the hope that it will be useful,
   56:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   57:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   58:   Lesser General Public License for more details.
   59: 
   60:   You should have received a copy of the GNU Lesser General Public
   61:   License along with this library; if not, write to the Free Software
   62:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
   63:  */
   64: #include <stdio.h>
   65: #include <stdlib.h>
   66: 
   67: #include <anthy/diclib.h>
   68: /* public APIs */
   69: #include <anthy/matrix.h>
   70: 
   71: /* maximum length allowed for hash chain */
   72: #define MAX_FAILURE 50
   73: 
   74: struct list_elm {
   75:   int index;
   76:   int value;
   77:   void *ptr;
   78:   struct list_elm *next;
   79:   /* bypass to mitigate O(n) insertion cost */
   80:   struct list_elm *orig_next;
   81: };
   82: 
   83: struct array_elm {
   84:   int index;
   85:   int value;
   86:   void *ptr;
   87: };
   88: 
   89: /*
   90:  * sparse array has two representation
   91:  *
   92:  *  (1) list and (2) hashed array
   93:  * build list first and sparse_array_make_array() to build hashed array
   94:  * this stores one value and one pointer
   95:  *
   96:  */
   97: struct sparse_array {
   98:   /* list representation */
   99:   int elm_count;
  100:   /* sorted */
  101:   struct list_elm head;
  102: 
  103:   /* array representation */
  104:   int array_len;
  105:   struct array_elm *array;
  106: };
  107: 
  108: static struct sparse_array *
  109: sparse_array_new(void)
  110: {
  111:   struct sparse_array *a = malloc(sizeof(struct sparse_array));
  112:   /**/
  113:   a->elm_count = 0;
  114:   a->head.next = NULL;
  115:   a->head.orig_next = NULL;
  116:   a->head.index = -1;
  117:   /**/
  118:   a->array_len = 0;
  119:   a->array = NULL;
  120:   return a;
  121: }
  122: 
  123: static void
  124: insert_elm_after(struct list_elm *elm, int idx, int val, void *ptr)
  125: {
  126:   struct list_elm *new_elm = malloc(sizeof(struct list_elm));
  127:   new_elm->value = val;
  128:   new_elm->index = idx;
  129:   new_elm->ptr = ptr;
  130:   /**/
  131:   new_elm->next = elm->next;
  132:   new_elm->orig_next = elm->next;
  133:   elm->next = new_elm;
  134: }
  135: 
  136: static void
  137: sparse_array_set(struct sparse_array *sa, int idx, int val, void *ptr)
  138: {
  139:   struct list_elm *e;
  140:   e = &sa->head;
  141:   while (e) {
  142:     if (e->index == idx) {
  143:       /* find same index and update */
  144:       e->value = val;
  145:       e->ptr = ptr;
  146:       return ;
  147:     }
  148:     /* search */
  149:     if (e->index < idx && (!e->next || idx < e->next->index)) {
  150:       insert_elm_after(e, idx, val, ptr);
  151:       /**/
  152:       sa->elm_count ++;
  153:       return ;
  154:     }
  155:     /* go next */
  156:     if (e->orig_next && e->orig_next->index < idx) {
  157:       /* leap ahead */
  158:       e = e->orig_next;
  159:     } else {
  160:       e = e->next;
  161:     }
  162:   }
  163: }
  164: 
  165: static int
  166: hash(int val, int max, int nth)
  167: {
  168:   val += nth * 113;
  169:   if (val < 0) {
  170:     val = -val;
  171:   }
  172:   if (max == 0) {
  173:     return 0;
  174:   }
  175:   return val % max;
  176: }
  177: 
  178: static int
  179: sparse_array_try_make_array(struct sparse_array *s)
  180: {
  181:   int i;
  182:   struct list_elm *e;
  183:   /* initialize */
  184:   free(s->array);
  185:   s->array = malloc(sizeof(struct array_elm) * s->array_len);
  186:   for (i = 0; i < s->array_len; i++) {
  187:     s->array[i].index = -1;
  188:   }
  189: 
  190:   /* push */
  191:   for (e = s->head.next; e; e = e->next) {
  192:     int ok = 0;
  193:     int n = 0;
  194:     do {
  195:       int h = hash(e->index, s->array_len, n);
  196:       if (s->array[h].index == -1) {
  197:         /* find unused element in this array */
  198:         ok = 1;
  199:         s->array[h].index = e->index;
  200:         s->array[h].value = e->value;
  201:         s->array[h].ptr = e->ptr;
  202:       } else {
  203:         /* collision */
  204:         n ++;
  205:         if (n > MAX_FAILURE) {
  206:           /* too much collision */
  207:           return 1;
  208:         }
  209:       }
  210:     } while (!ok);
  211:   }
  212:   return 0;
  213: }
  214: 
  215: static void
  216: sparse_array_make_array(struct sparse_array *s)
  217: {
  218:   /* estimate length */
  219:   if (s->elm_count == 1) {
  220:     s->array_len = 1;
  221:   } else {
  222:     s->array_len = s->elm_count;
  223:   }
  224:   while (sparse_array_try_make_array(s)) {
  225:     /* expand a little */
  226:     s->array_len ++;
  227:     s->array_len *= 9;
  228:     s->array_len /= 8;
  229:   }
  230: }
  231: 
  232: static struct array_elm *
  233: sparse_array_get(struct sparse_array *s, int index, struct array_elm *arg)
  234: {
  235:   if (s->array) {
  236:     int n = 0;
  237:     while (1) {
  238:       int h = hash(index, s->array_len, n);
  239:       if (s->array[h].index == index) {
  240:         *arg = s->array[h];
  241:         return arg;
  242:       }
  243:       n ++;
  244:       if (n == MAX_FAILURE) {
  245:         return NULL;
  246:       }
  247:     }
  248:   } else {
  249:     struct list_elm *e = e = s->head.next;
  250:     while (e) {
  251:       if (e->index == index) {
  252:         arg->value = e->value;
  253:         arg->ptr = e->ptr;
  254:         return arg;
  255:       }
  256:       /* go next */
  257:       if (e->orig_next && e->orig_next->index < index) {
  258:         /* leap ahead */
  259:         e = e->orig_next;
  260:       } else {
  261:         e = e->next;
  262:       }
  263:     }
  264:     return NULL;
  265:   }
  266: }
  267: 
  268: static int
  269: sparse_array_get_int(struct sparse_array *s, int index)
  270: {
  271:   struct array_elm elm;
  272:   if (sparse_array_get(s, index, &elm)) {
  273:     return elm.value;
  274:   }
  275:   return 0;
  276: }
  277: 
  278: static void *
  279: sparse_array_get_ptr(struct sparse_array *s, int index)
  280: {
  281:   struct array_elm elm;
  282:   if (sparse_array_get(s, index, &elm)) {
  283:     return elm.ptr;
  284:   }
  285:   return NULL;
  286: }
  287: 
  288: /**/
  289: struct sparse_matrix {
  290:   /**/
  291:   struct sparse_array *row_array;
  292:   /* image information */
  293:   int nr_rows;
  294:   int array_length;
  295: };
  296: 
  297: /* API */
  298: struct sparse_matrix *
  299: anthy_sparse_matrix_new()
  300: {
  301:   struct sparse_matrix *m = malloc(sizeof(struct sparse_matrix));
  302:   m->row_array = sparse_array_new();
  303:   m->nr_rows = 0;
  304:   return m;
  305: }
  306: 
  307: static struct sparse_array *
  308: find_row(struct sparse_matrix *m, int row, int create)
  309: {
  310:   struct sparse_array *a;
  311:   a = sparse_array_get_ptr(m->row_array, row);
  312:   if (a) {
  313:     return a;
  314:   }
  315:   if (!create) {
  316:     return NULL;
  317:   }
  318:   /* allocate a new row */
  319:   a = sparse_array_new();
  320:   sparse_array_set(m->row_array, row, 0, a);
  321:   m->nr_rows ++;
  322:   return a;
  323: }
  324: 
  325: /* API */
  326: void
  327: anthy_sparse_matrix_set(struct sparse_matrix *m, int row, int column,
  328:                         int value, void *ptr)
  329: {
  330:   struct sparse_array *a;
  331:   a = find_row(m, row, 1);
  332:   sparse_array_set(a, column, value, ptr);
  333: }
  334: 
  335: /* API */
  336: int
  337: anthy_sparse_matrix_get_int(struct sparse_matrix *m, int row, int column)
  338: {
  339:   struct sparse_array *a;
  340:   struct list_elm *e;
  341:   a = find_row(m, row, 1);
  342:   if (!a) {
  343:     return 0;
  344:   }
  345:   for (e = &a->head; e; e = e->next) {
  346:     if (e->index == column) {
  347:       return e->value;
  348:     }
  349:   }
  350:   return 0;
  351: }
  352: 
  353: /* API */
  354: void
  355: anthy_sparse_matrix_make_matrix(struct sparse_matrix *m)
  356: {
  357:   struct array_elm *ae;
  358:   int i;
  359:   int offset = 0;
  360:   /**/
  361:   sparse_array_make_array(m->row_array);
  362:   /**/
  363:   for (i = 0; i < m->row_array->array_len; i++) {
  364:     struct sparse_array *row;
  365:     ae = &m->row_array->array[i];
  366:     /**/
  367:     ae->value = offset;
  368:     if (ae->index == -1) {
  369:       continue;
  370:     }
  371:     /**/
  372:     row = ae->ptr;
  373:     sparse_array_make_array(row);
  374:     offset += row->array_len;
  375:   }
  376:   m->array_length = offset;
  377: }
  378: 
  379: /* API */
  380: struct matrix_image *
  381: anthy_matrix_image_new(struct sparse_matrix *s)
  382: {
  383:   struct matrix_image *mi;
  384:   int i;
  385:   int offset;
  386:   /**/
  387:   mi = malloc(sizeof(struct matrix_image));
  388:   mi->size = 2 + s->row_array->array_len * 2 + s->array_length * 2;
  389:   mi->image = malloc(sizeof(int) * mi->size);
  390:   mi->image[0] = s->row_array->array_len;
  391:   mi->image[1] = s->array_length;
  392:   /* row index */
  393:   offset = 2;
  394:   for (i = 0; i < s->row_array->array_len; i++) {
  395:     struct array_elm *ae;
  396:     ae = &s->row_array->array[i];
  397:     mi->image[offset + i*2] = ae->index;
  398:     mi->image[offset + i*2 + 1] = ae->value;
  399:   }
  400:   /* cells */
  401:   offset = 2 + s->row_array->array_len * 2;
  402:   for (i = 0; i < s->row_array->array_len; i++) {
  403:     struct array_elm *ae;
  404:     struct sparse_array *sa;
  405:     int j;
  406:     ae = &s->row_array->array[i];
  407:     if (ae->index == -1) {
  408:       continue;
  409:     }
  410:     sa = ae->ptr;
  411:     if (!sa) {
  412:       continue;
  413:     }
  414:     for (j = 0; j < sa->array_len; j++) {
  415:       struct array_elm *cell = &sa->array[j];
  416:       mi->image[offset] = cell->index;
  417:       if (cell->index == -1) {
  418:         mi->image[offset + 1] = -1;
  419:       } else {
  420:         mi->image[offset + 1] = cell->value;
  421:       }
  422:       offset += 2;
  423:     }
  424:   }
  425:   /**/
  426:   return mi;
  427: }
  428: 
  429: static int
  430: read_int(int *image, int idx, int en)
  431: {
  432:   if (en) {
  433:     return anthy_dic_ntohl(image[idx]);
  434:   }
  435:   return image[idx];
  436: }
  437: 
  438: static int
  439: do_matrix_peek(int *image, int row, int col, int en)
  440: {
  441:   int n, h, shift, next_shift;
  442:   int row_array_len = read_int(image, 0, en);
  443:   int column_array_len;
  444:   int cell_offset;
  445: 
  446:   /* find row */
  447:   if (row_array_len == 0) {
  448:     return 0;
  449:   }
  450:   for (n = 0; ; n++) {
  451:     h = hash(row, row_array_len, n);
  452:     if (read_int(image, 2+ h * 2, en) == row) {
  453:       shift = read_int(image, 2+h*2+1, en);
  454:       break;
  455:     }
  456:     if (read_int(image, 2+ h * 2, en) == -1) {
  457:       return 0;
  458:     }
  459:     if (n > MAX_FAILURE) {
  460:       return 0;
  461:     }
  462:   }
  463: 
  464:   /* find shift count of next row */
  465:   if (h == row_array_len - 1) {
  466:     /* last one */
  467:     next_shift = read_int(image, 1, en);
  468:   } else {
  469:     /* not last one */
  470:     next_shift = read_int(image, 2+h*2+2+1, en);
  471:   }
  472: 
  473:   /* crammed width of this row */
  474:   column_array_len = next_shift - shift;
  475: 
  476:   /* cells in this image */
  477:   cell_offset = 2 + row_array_len * 2;
  478:   for (n = 0; ; n++) {
  479:     h = hash(col, column_array_len, n);
  480:     if (read_int(image, cell_offset + shift * 2+ h * 2, en) == col) {
  481:       return read_int(image, cell_offset + shift * 2 + h*2+1, en);
  482:     }
  483:     if (read_int(image, cell_offset + shift * 2+ h * 2, en) == -1) {
  484:       /* not exist */
  485:       return 0;
  486:     }
  487:     if (n > MAX_FAILURE) {
  488:       return 0;
  489:     }
  490:   }
  491:   return 0;
  492: }
  493: 
  494: /* API */
  495: int
  496: anthy_matrix_image_peek(int *image, int row, int col)
  497: {
  498:   if (!image) {
  499:     return 0;
  500:   }
  501:   return do_matrix_peek(image, row, col, 1);
  502: }
  503: 
  504: #ifdef DEBUG
  505: /* for debug purpose */
  506: static void
  507: sparse_array_dump(struct sparse_array *s)
  508: {
  509:   struct list_elm *e;
  510:   int i;
  511:   printf("list(%d):", s->elm_count);
  512:   for (e = s->head.next; e; e = e->next) {
  513:     printf(" %d:%d(%x)", e->index, e->value, (unsigned long)e->ptr);
  514:   }
  515:   printf("\n");
  516:   if (!s->array) {
  517:     return ;
  518:   }
  519:   printf("array(%d):", s->array_len);
  520:   for (i = 0; i < s->array_len; i ++) {
  521:     struct array_elm *ae = &s->array[i];
  522:     if (ae->index != -1) {
  523:       printf(" %d:%d,%d(%x)", i, ae->index, ae->value, (unsigned long)ae->ptr);
  524:     }
  525:   }
  526:   printf("\n");
  527:   return ;
  528:   /**/
  529: }
  530: 
  531: /* for debug purpose */
  532: void
  533: sparse_matrix_dump(struct sparse_matrix *m)
  534: {
  535:   struct list_elm *e;
  536:   struct array_elm *ae;
  537:   int i, offset;
  538:   if (!m->row_array) {
  539:     for (e = m->row_array->head.next; e; e = e->next) {
  540:       sparse_array_dump(e->ptr);
  541:     }
  542:     return ;
  543:   }
  544:   printf("\nnumber of row=%d, row array size=%d, cell array size=%d\n\n",
  545:          m->nr_rows, m->row_array->array_len, m->array_length);
  546:   /* row part */
  547:   for (i = 0; i < m->row_array->array_len; i++) {
  548:     struct array_elm *ae;
  549:     ae = &m->row_array->array[i];
  550:     if (ae->index != -1) {
  551:       printf(" [%d] row=%d, shift=%d\n", i, ae->index, ae->value);
  552:     }
  553:   }
  554:   printf("\n");
  555:   offset = 0;
  556:   for (i = 0; i < m->row_array->array_len; i++) {
  557:     struct array_elm *ae;
  558:     struct sparse_array *sa;
  559:     int j;
  560:     ae = &m->row_array->array[i];
  561:     sa = ae->ptr;
  562:     if (!sa) {
  563:       continue;
  564:     }
  565:     for (j = 0; j < sa->array_len; j++) {
  566:       struct array_elm *cell = &sa->array[j];
  567:       if (cell->index != -1) {
  568:         printf("  [%d] column=%d, value=%d\n", offset, cell->index, cell->value);
  569:       }
  570:       offset ++;
  571:     }
  572:   }
  573:   printf("\n");
  574: }
  575: #<