(linenum→info "unix/slp.c:2238")

ruby/1.9.0/marshal.c

    1: /**********************************************************************
    2: 
    3:   marshal.c -
    4: 
    5:   $Author: matz $
    6:   $Date: 2007-11-22 10:17:52 +0900 (Thu, 22 Nov 2007) $
    7:   created at: Thu Apr 27 16:30:01 JST 1995
    8: 
    9:   Copyright (C) 1993-2007 Yukihiro Matsumoto
   10: 
   11: **********************************************************************/
   12: 
   13: #include "ruby/ruby.h"
   14: #include "ruby/io.h"
   15: #include "ruby/st.h"
   16: #include "ruby/util.h"
   17: #include "ruby/encoding.h"
   18: 
   19: #include <math.h>
   20: #ifdef HAVE_FLOAT_H
   21: #include <float.h>
   22: #endif
   23: #ifdef HAVE_IEEEFP_H
   24: #include <ieeefp.h>
   25: #endif
   26: 
   27: #define BITSPERSHORT (2*CHAR_BIT)
   28: #define SHORTMASK ((1<<BITSPERSHORT)-1)
   29: #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
   30: 
   31: #if SIZEOF_SHORT == SIZEOF_BDIGITS
   32: #define SHORTLEN(x) (x)
   33: #else
   34: static int
   35: shortlen(long len, BDIGIT *ds)
   36: {
   37:     BDIGIT num;
   38:     int offset = 0;
   39: 
   40:     num = ds[len-1];
   41:     while (num) {
   42:         num = SHORTDN(num);
   43:         offset++;
   44:     }
   45:     return (len - 1)*sizeof(BDIGIT)/2 + offset;
   46: }
   47: #define SHORTLEN(x) shortlen((x),d)
   48: #endif
   49: 
   50: #define MARSHAL_MAJOR   4
   51: #define MARSHAL_MINOR   8
   52: 
   53: #define TYPE_NIL        '0'
   54: #define TYPE_TRUE       'T'
   55: #define TYPE_FALSE      'F'
   56: #define TYPE_FIXNUM     'i'
   57: 
   58: #define TYPE_EXTENDED   'e'
   59: #define TYPE_UCLASS     'C'
   60: #define TYPE_OBJECT     'o'
   61: #define TYPE_DATA       'd'
   62: #define TYPE_USERDEF    'u'
   63: #define TYPE_USRMARSHAL 'U'
   64: #define TYPE_FLOAT      'f'
   65: #define TYPE_BIGNUM     'l'
   66: #define TYPE_STRING     '"'
   67: #define TYPE_REGEXP     '/'
   68: #define TYPE_ARRAY      '['
   69: #define TYPE_HASH       '{'
   70: #define TYPE_HASH_DEF   '}'
   71: #define TYPE_STRUCT     'S'
   72: #define TYPE_MODULE_OLD 'M'
   73: #define TYPE_CLASS      'c'
   74: #define TYPE_MODULE     'm'
   75: 
   76: #define TYPE_SYMBOL     ':'
   77: #define TYPE_SYMLINK    ';'
   78: 
   79: #define TYPE_IVAR       'I'
   80: #define TYPE_LINK       '@'
   81: 
   82: static ID s_dump, s_load, s_mdump, s_mload;
   83: static ID s_dump_data, s_load_data, s_alloc;
   84: static ID s_getc, s_read, s_write, s_binmode;
   85: 
   86: ID rb_id_encoding(void);
   87: 
   88: typedef struct {
   89:     VALUE newclass;
   90:     VALUE oldclass;
   91:     VALUE (*dumper)(VALUE);
   92:     VALUE (*loader)(VALUE, VALUE);
   93: } marshal_compat_t;
   94: 
   95: static st_table *compat_allocator_tbl;
   96: static VALUE compat_allocator_tbl_wrapper;
   97: 
   98: static int
   99: mark_marshal_compat_i(st_data_t key, st_data_t value)
  100: {
  101:     marshal_compat_t *p = (marshal_compat_t *)value;
  102:     rb_gc_mark(p->newclass);
  103:     rb_gc_mark(p->oldclass);
  104:     return ST_CONTINUE;
  105: }
  106: 
  107: static void
  108: mark_marshal_compat_t(void *tbl)
  109: {
  110:     if (!tbl) return;
  111:     st_foreach(tbl, mark_marshal_compat_i, 0);
  112: }
  113: 
  114: void
  115: rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
  116: {
  117:     marshal_compat_t *compat;
  118:     rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
  119: 
  120:     if (!allocator) {
  121:         rb_raise(rb_eTypeError, "no allocator");
  122:     }
  123: 
  124:     compat = ALLOC(marshal_compat_t);
  125:     compat->newclass = Qnil;
  126:     compat->oldclass = Qnil;
  127:     compat->newclass = newclass;
  128:     compat->oldclass = oldclass;
  129:     compat->dumper = dumper;
  130:     compat->loader = loader;
  131: 
  132:     st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
  133: }
  134: 
  135: struct dump_arg {
  136:     VALUE obj;
  137:     VALUE str, dest;
  138:     st_table *symbols;
  139:     st_table *data;
  140:     int taint;
  141:     st_table *compat_tbl;
  142:     VALUE wrapper;
  143:     st_table *encodings;
  144: };
  145: 
  146: struct dump_call_arg {
  147:     VALUE obj;
  148:     struct dump_arg *arg;
  149:     int limit;
  150: };
  151: 
  152: static void
  153: mark_dump_arg(void *ptr)
  154: {
  155:     struct dump_arg *p = ptr;
  156:     if (!ptr)
  157:         return;
  158:     rb_mark_set(p->data);
  159:     rb_mark_hash(p->compat_tbl);
  160: }
  161: 
  162: static VALUE
  163: class2path(VALUE klass)
  164: {
  165:     VALUE path = rb_class_path(klass);
  166:     char *n = RSTRING_PTR(path);
  167: 
  168:     if (n[0] == '#') {
  169:         rb_raise(rb_eTypeError, "can't dump anonymous %s %s",
  170:                  (TYPE(klass) == T_CLASS ? "class" : "module"),
  171:                  n);
  172:     }
  173:     if (rb_path2class(n) != rb_class_real(klass)) {
  174:         rb_raise(rb_eTypeError, "%s can't be referred", n);
  175:     }
  176:     return path;
  177: }
  178: 
  179: static void w_long(long, struct dump_arg*);
  180: 
  181: static void
  182: w_nbyte(const char *s, int n, struct dump_arg *arg)
  183: {
  184:     VALUE buf = arg->str;
  185:     rb_str_buf_cat(buf, s, n);
  186:     if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
  187:         if (arg->taint) OBJ_TAINT(buf);
  188:         rb_io_write(arg->dest, buf);
  189:         rb_str_resize(buf, 0);
  190:     }
  191: }
  192: 
  193: static void
  194: w_byte(char c, struct dump_arg *arg)
  195: {
  196:     w_nbyte(&c, 1, arg);
  197: }
  198: 
  199: static void
  200: w_bytes(const char *s, int n, struct dump_arg *arg)
  201: {
  202:     w_long(n, arg);
  203:     w_nbyte(s, n, arg);
  204: }
  205: 
  206: static void
  207: w_short(int x, struct dump_arg *arg)
  208: {
  209:     w_byte((char)((x >> 0) & 0xff), arg);
  210:     w_byte((char)((x >> 8) & 0xff), arg);
  211: }
  212: 
  213: static void
  214: w_long(long x, struct dump_arg *arg)
  215: {
  216:     char buf[sizeof(long)+1];
  217:     int i, len = 0;
  218: 
  219: #if SIZEOF_LONG > 4
  220:     if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
  221:         /* big long does not fit in 4 bytes */
  222:         rb_raise(rb_eTypeError, "long too big to dump");
  223:     }
  224: #endif
  225: 
  226:     if (x == 0) {
  227:         w_byte(0, arg);
  228:         return;
  229:     }
  230:     if (0 < x && x < 123) {
  231:         w_byte((char)(x + 5), arg);
  232:         return;
  233:     }
  234:     if (-124 < x && x < 0) {
  235:         w_byte((char)((x - 5)&0xff), arg);
  236:         return;
  237:     }
  238:     for (i=1;i<sizeof(long)+1;i++) {
  239:         buf[i] = x & 0xff;
  240:         x = RSHIFT(x,8);
  241:         if (x == 0) {
  242:             buf[0] = i;
  243:             break;
  244:         }
  245:         if (x == -1) {
  246:             buf[0] = -i;
  247:             break;
  248:         }
  249:     }
  250:     len = i;
  251:     for (i=0;i<=len;i++) {
  252:         w_byte(buf[i], arg);
  253:     }
  254: }
  255: 
  256: #ifdef DBL_MANT_DIG
  257: #define DECIMAL_MANT (53-16)    /* from IEEE754 double precision */
  258: 
  259: #if DBL_MANT_DIG > 32
  260: #define MANT_BITS 32
  261: #elif DBL_MANT_DIG > 24
  262: #define MANT_BITS 24
  263: #elif DBL_MANT_DIG > 16
  264: #define MANT_BITS 16
  265: #else
  266: #define MANT_BITS 8
  267: #endif
  268: 
  269: static int
  270: save_mantissa(double d, char *buf)
  271: {
  272:     int e, i = 0;
  273:     unsigned long m;
  274:     double n;
  275: 
  276:     d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
  277:     if (d > 0) {
  278:         buf[i++] = 0;
  279:         do {
  280:             d = modf(ldexp(d, MANT_BITS), &n);
  281:             m = (unsigned long)n;
  282: #if MANT_BITS > 24
  283:             buf[i++] = m >> 24;
  284: #endif
  285: #if MANT_BITS > 16
  286:             buf[i++] = m >> 16;
  287: #endif
  288: #if MANT_BITS > 8
  289:             buf[i++] = m >> 8;
  290: #endif
  291:             buf[i++] = m;
  292:         } while (d > 0);
  293:         while (!buf[i - 1]) --i;
  294:     }
  295:     return i;
  296: }
  297: 
  298: static double
  299: load_mantissa(double d, const char *buf, int len)
  300: {
  301:     if (--len > 0 && !*buf++) { /* binary mantissa mark */
  302:         int e, s = d < 0, dig = 0;
  303:         unsigned long m;
  304: 
  305:         modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
  306:         do {
  307:             m = 0;
  308:             switch (len) {
  309:               default: m = *buf++ & 0xff;
  310: #if MANT_BITS > 24
  311:               case 3: m = (m << 8) | (*buf++ & 0xff);
  312: #endif
  313: #if MANT_BITS > 16
  314:               case 2: m = (m << 8) | (*buf++ & 0xff);
  315: #endif
  316: #if MANT_BITS > 8
  317:               case 1: m = (m << 8) | (*buf++ & 0xff);
  318: #endif
  319:             }
  320:             dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
  321:             d += ldexp((double)m, dig);
  322:         } while ((len -= MANT_BITS / 8) > 0);
  323:         d = ldexp(d, e - DECIMAL_MANT);
  324:         if (s) d = -d;
  325:     }
  326:     return d;
  327: }
  328: #else
  329: #define load_mantissa(d, buf, len) (d)
  330: #define save_mantissa(d, buf) 0
  331: #endif
  332: 
  333: #ifdef DBL_DIG
  334: #define FLOAT_DIG (DBL_DIG+2)
  335: #else
  336: #define FLOAT_DIG 17
  337: #endif
  338: 
  339: static void
  340: w_float(double d, struct dump_arg *arg)
  341: {
  342:     char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
  343: 
  344:     if (isinf(d)) {
  345:         if (d < 0) strcpy(buf, "-inf");
  346:         else       strcpy(buf, "inf");
  347:     }
  348:     else if (isnan(d)) {
  349:         strcpy(buf, "nan");
  350:     }
  351:     else if (d == 0.0) {
  352:         if (1.0/d < 0) strcpy(buf, "-0");
  353:         else           strcpy(buf, "0");
  354:     }
  355:     else {
  356:         int len;
  357: 
  358:         /* xxx: should not use system's sprintf(3) */
  359:         snprintf(buf, sizeof(buf), "%.*g", FLOAT_DIG, d);
  360:         len = strlen(buf);
  361:         w_bytes(buf, len + save_mantissa(d, buf + len), arg);
  362:         return;
  363:     }
  364:     w_bytes(buf, strlen(buf), arg);
  365: }
  366: 
  367: static void
  368: w_symbol(ID id, struct dump_arg *arg)
  369: {
  370:     const char *sym;
  371:     st_data_t num;
  372: 
  373:     if (st_lookup(arg->symbols, id, &num)) {
  374:         w_byte(TYPE_SYMLINK, arg);
  375:         w_long((long)num, arg);
  376:     }
  377:     else {
  378:         sym = rb_id2name(id);
  379:         if (!sym) {
  380:             rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id);
  381:         }
  382:         w_byte(TYPE_SYMBOL, arg);
  383:         w_bytes(sym, strlen(sym), arg);
  384:         st_add_direct(arg->symbols, id, arg->symbols->num_entries);
  385:     }
  386: }
  387: 
  388: static void
  389: w_unique(const char *s, struct dump_arg *arg)
  390: {
  391:     if (s[0] == '#') {
  392:         rb_raise(rb_eTypeError, "can't dump anonymous class %s", s);
  393:     }
  394:     w_symbol(rb_intern(s), arg);
  395: }
  396: 
  397: static void w_object(VALUE,struct dump_arg*,int);
  398: 
  399: static int
  400: hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
  401: {
  402:     w_object(key, arg->arg, arg->limit);
  403:     w_object(value, arg->arg, arg->limit);
  404:     return ST_CONTINUE;
  405: }
  406: 
  407: static void
  408: w_extended(VALUE klass, struct dump_arg *arg, int check)
  409: {
  410:     char *path;
  411: 
  412:     if (check && FL_TEST(klass, FL_SINGLETON)) {
  413:         if (RCLASS_M_TBL(klass)->num_entries ||
  414:             (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
  415:             rb_raise(rb_eTypeError, "singleton can't be dumped");
  416:         }
  417:         klass = RCLASS_SUPER(klass);
  418:     }
  419:     while (BUILTIN_TYPE(klass) == T_ICLASS) {
  420:         path = rb_class2name(RBASIC(klass)->klass);
  421:         w_byte(TYPE_EXTENDED, arg);
  422:         w_unique(path, arg);
  423:         klass = RCLASS_SUPER(klass);
  424:     }
  425: }
  426: 
  427: static void
  428: w_class(char type, VALUE obj, struct dump_arg *arg, int check)
  429: {
  430:     volatile VALUE p;
  431:     char *path;
  432:     st_data_t real_obj;
  433:     VALUE klass;
  434: 
  435:     if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
  436:         obj = (VALUE)real_obj;
  437:     }
  438:     klass = CLASS_OF(obj);
  439:     w_extended(klass, arg, check);
  440:     w_byte(type, arg);
  441:     p = class2path(rb_class_real(klass));
  442:     path = RSTRING_PTR(p);
  443:     w_unique(path, arg);
  444: }
  445: 
  446: static void
  447: w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
  448: {
  449:     VALUE klass = CLASS_OF(obj);
  450: 
  451:     w_extended(klass, arg, Qtrue);
  452:     klass = rb_class_real(klass);
  453:     if (klass != super) {
  454:         w_byte(TYPE_UCLASS, arg);
  455:         w_unique(RSTRING_PTR(class2path(klass)), arg);
  456:     }
  457: }
  458: 
  459: static int
  460: w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
  461: {
  462:     if (id == rb_id_encoding()) return ST_CONTINUE;
  463:     w_symbol(id, arg->arg);
  464:     w_object(value, arg->arg, arg->limit);
  465:     return ST_CONTINUE;
  466: }
  467: 
  468: static void
  469: w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
  470: {
  471:     int encidx = rb_enc_get_index(obj);
  472:     rb_encoding *enc = 0;
  473:     st_data_t name;
  474: 
  475:     if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
  476:         w_long(num, arg->arg);
  477:         return;
  478:     }
  479:     w_long(num + 1, arg->arg);
  480:     w_symbol(rb_id_encoding(), arg->arg);
  481:     do {
  482:         if (!arg->arg->encodings)
  483:             arg->arg->encodings = st_init_strcasetable();
  484:         else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
  485:             break;
  486:         name = (st_data_t)rb_str_new2(rb_enc_name(enc));
  487:         st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
  488:     } while (0);
  489:     w_object(name, arg->arg, arg->limit);
  490: }
  491: 
  492: static void
  493: w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
  494: {
  495:     long num = tbl ? tbl->num_entries : 0;
  496: 
  497:     w_encoding(obj, num, arg);
  498:     if (tbl) {
  499:         st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
  500:     }
  501: }
  502: 
  503: static void
  504: w_objivar(VALUE obj, struct dump_call_arg *arg)
  505: {
  506:     VALUE *ptr;
  507:     long i, len, num;
  508: 
  509:     len = ROBJECT_LEN(obj);
  510:     ptr = ROBJECT_PTR(obj);
  511:     num = 0;
  512:     for (i = 0; i < len; i++)
  513:         if (ptr[i] != Qundef)
  514:             num += 1;
  515: 
  516:     w_encoding(obj, num, arg);
  517:     if (num != 0) {
  518:         rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
  519:     }
  520: }
  521: 
  522: static void
  523: w_object(VALUE obj, struct dump_arg *arg, int limit)
  524: {
  525:     struct dump_call_arg c_arg;
  526:     st_table *ivtbl = 0;
  527:     st_data_t num;
  528:     int hasiv = 0;
  529: #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
  530:                                (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj)))
  531: 
  532:     if (limit == 0) {
  533:         rb_raise(rb_eArgError, "exceed depth limit");
  534:     }
  535: 
  536:     limit--;
  537:     c_arg.limit = limit;
  538:     c_arg.arg = arg;
  539: 
  540:     if (st_lookup(arg->data, obj, &num)) {
  541:         w_byte(TYPE_LINK, arg);
  542:         w_long((long)num, arg);
  543:         return;
  544:     }
  545: 
  546:     if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
  547:         w_byte(TYPE_IVAR, arg);
  548:     }
  549:     if (obj == Qnil) {
  550:         w_byte(TYPE_NIL, arg);
  551:     }
  552:     else if (obj == Qtrue) {
  553:         w_byte(TYPE_TRUE, arg);
  554:     }
  555:     else if (obj == Qfalse) {
  556:         w_byte(TYPE_FALSE, arg);
  557:     }
  558:     else if (FIXNUM_P(obj)) {
  559: #if SIZEOF_LONG <= 4
  560:         w_byte(TYPE_FIXNUM, arg);
  561:         w_long(FIX2INT(obj), arg);
  562: #else
  563:         if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
  564:             w_byte(TYPE_FIXNUM, arg);
  565:             w_long(FIX2LONG(obj), arg);<