1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13: #include "ruby/ruby.h"
14: #include "ruby/io.h"
15: #include "ruby/st.h"
16: #include "ruby/util.h"
17: #include "ruby/encoding.h"
18:
19: #include <math.h>
20: #ifdef HAVE_FLOAT_H
21: #include <float.h>
22: #endif
23: #ifdef HAVE_IEEEFP_H
24: #include <ieeefp.h>
25: #endif
26:
27: #define BITSPERSHORT (2*CHAR_BIT)
28: #define SHORTMASK ((1<<BITSPERSHORT)-1)
29: #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
30:
31: #if SIZEOF_SHORT == SIZEOF_BDIGITS
32: #define SHORTLEN(x) (x)
33: #else
34: static int
35: shortlen(long len, BDIGIT *ds)
36: {
37: BDIGIT num;
38: int offset = 0;
39:
40: num = ds[len-1];
41: while (num) {
42: num = SHORTDN(num);
43: offset++;
44: }
45: return (len - 1)*sizeof(BDIGIT)/2 + offset;
46: }
47: #define SHORTLEN(x) shortlen((x),d)
48: #endif
49:
50: #define MARSHAL_MAJOR 4
51: #define MARSHAL_MINOR 8
52:
53: #define TYPE_NIL '0'
54: #define TYPE_TRUE 'T'
55: #define TYPE_FALSE 'F'
56: #define TYPE_FIXNUM 'i'
57:
58: #define TYPE_EXTENDED 'e'
59: #define TYPE_UCLASS 'C'
60: #define TYPE_OBJECT 'o'
61: #define TYPE_DATA 'd'
62: #define TYPE_USERDEF 'u'
63: #define TYPE_USRMARSHAL 'U'
64: #define TYPE_FLOAT 'f'
65: #define TYPE_BIGNUM 'l'
66: #define TYPE_STRING '"'
67: #define TYPE_REGEXP '/'
68: #define TYPE_ARRAY '['
69: #define TYPE_HASH '{'
70: #define TYPE_HASH_DEF '}'
71: #define TYPE_STRUCT 'S'
72: #define TYPE_MODULE_OLD 'M'
73: #define TYPE_CLASS 'c'
74: #define TYPE_MODULE 'm'
75:
76: #define TYPE_SYMBOL ':'
77: #define TYPE_SYMLINK ';'
78:
79: #define TYPE_IVAR 'I'
80: #define TYPE_LINK '@'
81:
82: static ID s_dump, s_load, s_mdump, s_mload;
83: static ID s_dump_data, s_load_data, s_alloc;
84: static ID s_getc, s_read, s_write, s_binmode;
85:
86: ID rb_id_encoding(void);
87:
88: typedef struct {
89: VALUE newclass;
90: VALUE oldclass;
91: VALUE (*dumper)(VALUE);
92: VALUE (*loader)(VALUE, VALUE);
93: } marshal_compat_t;
94:
95: static st_table *compat_allocator_tbl;
96: static VALUE compat_allocator_tbl_wrapper;
97:
98: static int
99: mark_marshal_compat_i(st_data_t key, st_data_t value)
100: {
101: marshal_compat_t *p = (marshal_compat_t *)value;
102: rb_gc_mark(p->newclass);
103: rb_gc_mark(p->oldclass);
104: return ST_CONTINUE;
105: }
106:
107: static void
108: mark_marshal_compat_t(void *tbl)
109: {
110: if (!tbl) return;
111: st_foreach(tbl, mark_marshal_compat_i, 0);
112: }
113:
114: void
115: rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
116: {
117: marshal_compat_t *compat;
118: rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
119:
120: if (!allocator) {
121: rb_raise(rb_eTypeError, "no allocator");
122: }
123:
124: compat = ALLOC(marshal_compat_t);
125: compat->newclass = Qnil;
126: compat->oldclass = Qnil;
127: compat->newclass = newclass;
128: compat->oldclass = oldclass;
129: compat->dumper = dumper;
130: compat->loader = loader;
131:
132: st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
133: }
134:
135: struct dump_arg {
136: VALUE obj;
137: VALUE str, dest;
138: st_table *symbols;
139: st_table *data;
140: int taint;
141: st_table *compat_tbl;
142: VALUE wrapper;
143: st_table *encodings;
144: };
145:
146: struct dump_call_arg {
147: VALUE obj;
148: struct dump_arg *arg;
149: int limit;
150: };
151:
152: static void
153: mark_dump_arg(void *ptr)
154: {
155: struct dump_arg *p = ptr;
156: if (!ptr)
157: return;
158: rb_mark_set(p->data);
159: rb_mark_hash(p->compat_tbl);
160: }
161:
162: static VALUE
163: class2path(VALUE klass)
164: {
165: VALUE path = rb_class_path(klass);
166: char *n = RSTRING_PTR(path);
167:
168: if (n[0] == '#') {
169: rb_raise(rb_eTypeError, "can't dump anonymous %s %s",
170: (TYPE(klass) == T_CLASS ? "class" : "module"),
171: n);
172: }
173: if (rb_path2class(n) != rb_class_real(klass)) {
174: rb_raise(rb_eTypeError, "%s can't be referred", n);
175: }
176: return path;
177: }
178:
179: static void w_long(long, struct dump_arg*);
180:
181: static void
182: w_nbyte(const char *s, int n, struct dump_arg *arg)
183: {
184: VALUE buf = arg->str;
185: rb_str_buf_cat(buf, s, n);
186: if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
187: if (arg->taint) OBJ_TAINT(buf);
188: rb_io_write(arg->dest, buf);
189: rb_str_resize(buf, 0);
190: }
191: }
192:
193: static void
194: w_byte(char c, struct dump_arg *arg)
195: {
196: w_nbyte(&c, 1, arg);
197: }
198:
199: static void
200: w_bytes(const char *s, int n, struct dump_arg *arg)
201: {
202: w_long(n, arg);
203: w_nbyte(s, n, arg);
204: }
205:
206: static void
207: w_short(int x, struct dump_arg *arg)
208: {
209: w_byte((char)((x >> 0) & 0xff), arg);
210: w_byte((char)((x >> 8) & 0xff), arg);
211: }
212:
213: static void
214: w_long(long x, struct dump_arg *arg)
215: {
216: char buf[sizeof(long)+1];
217: int i, len = 0;
218:
219: #if SIZEOF_LONG > 4
220: if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
221:
222: rb_raise(rb_eTypeError, "long too big to dump");
223: }
224: #endif
225:
226: if (x == 0) {
227: w_byte(0, arg);
228: return;
229: }
230: if (0 < x && x < 123) {
231: w_byte((char)(x + 5), arg);
232: return;
233: }
234: if (-124 < x && x < 0) {
235: w_byte((char)((x - 5)&0xff), arg);
236: return;
237: }
238: for (i=1;i<sizeof(long)+1;i++) {
239: buf[i] = x & 0xff;
240: x = RSHIFT(x,8);
241: if (x == 0) {
242: buf[0] = i;
243: break;
244: }
245: if (x == -1) {
246: buf[0] = -i;
247: break;
248: }
249: }
250: len = i;
251: for (i=0;i<=len;i++) {
252: w_byte(buf[i], arg);
253: }
254: }
255:
256: #ifdef DBL_MANT_DIG
257: #define DECIMAL_MANT (53-16)
258:
259: #if DBL_MANT_DIG > 32
260: #define MANT_BITS 32
261: #elif DBL_MANT_DIG > 24
262: #define MANT_BITS 24
263: #elif DBL_MANT_DIG > 16
264: #define MANT_BITS 16
265: #else
266: #define MANT_BITS 8
267: #endif
268:
269: static int
270: save_mantissa(double d, char *buf)
271: {
272: int e, i = 0;
273: unsigned long m;
274: double n;
275:
276: d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
277: if (d > 0) {
278: buf[i++] = 0;
279: do {
280: d = modf(ldexp(d, MANT_BITS), &n);
281: m = (unsigned long)n;
282: #if MANT_BITS > 24
283: buf[i++] = m >> 24;
284: #endif
285: #if MANT_BITS > 16
286: buf[i++] = m >> 16;
287: #endif
288: #if MANT_BITS > 8
289: buf[i++] = m >> 8;
290: #endif
291: buf[i++] = m;
292: } while (d > 0);
293: while (!buf[i - 1]) --i;
294: }
295: return i;
296: }
297:
298: static double
299: load_mantissa(double d, const char *buf, int len)
300: {
301: if (--len > 0 && !*buf++) {
302: int e, s = d < 0, dig = 0;
303: unsigned long m;
304:
305: modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
306: do {
307: m = 0;
308: switch (len) {
309: default: m = *buf++ & 0xff;
310: #if MANT_BITS > 24
311: case 3: m = (m << 8) | (*buf++ & 0xff);
312: #endif
313: #if MANT_BITS > 16
314: case 2: m = (m << 8) | (*buf++ & 0xff);
315: #endif
316: #if MANT_BITS > 8
317: case 1: m = (m << 8) | (*buf++ & 0xff);
318: #endif
319: }
320: dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
321: d += ldexp((double)m, dig);
322: } while ((len -= MANT_BITS / 8) > 0);
323: d = ldexp(d, e - DECIMAL_MANT);
324: if (s) d = -d;
325: }
326: return d;
327: }
328: #else
329: #define load_mantissa(d, buf, len) (d)
330: #define save_mantissa(d, buf) 0
331: #endif
332:
333: #ifdef DBL_DIG
334: #define FLOAT_DIG (DBL_DIG+2)
335: #else
336: #define FLOAT_DIG 17
337: #endif
338:
339: static void
340: w_float(double d, struct dump_arg *arg)
341: {
342: char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
343:
344: if (isinf(d)) {
345: if (d < 0) strcpy(buf, "-inf");
346: else strcpy(buf, "inf");
347: }
348: else if (isnan(d)) {
349: strcpy(buf, "nan");
350: }
351: else if (d == 0.0) {
352: if (1.0/d < 0) strcpy(buf, "-0");
353: else strcpy(buf, "0");
354: }
355: else {
356: int len;
357:
358:
359: snprintf(buf, sizeof(buf), "%.*g", FLOAT_DIG, d);
360: len = strlen(buf);
361: w_bytes(buf, len + save_mantissa(d, buf + len), arg);
362: return;
363: }
364: w_bytes(buf, strlen(buf), arg);
365: }
366:
367: static void
368: w_symbol(ID id, struct dump_arg *arg)
369: {
370: const char *sym;
371: st_data_t num;
372:
373: if (st_lookup(arg->symbols, id, &num)) {
374: w_byte(TYPE_SYMLINK, arg);
375: w_long((long)num, arg);
376: }
377: else {
378: sym = rb_id2name(id);
379: if (!sym) {
380: rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id);
381: }
382: w_byte(TYPE_SYMBOL, arg);
383: w_bytes(sym, strlen(sym), arg);
384: st_add_direct(arg->symbols, id, arg->symbols->num_entries);
385: }
386: }
387:
388: static void
389: w_unique(const char *s, struct dump_arg *arg)
390: {
391: if (s[0] == '#') {
392: rb_raise(rb_eTypeError, "can't dump anonymous class %s", s);
393: }
394: w_symbol(rb_intern(s), arg);
395: }
396:
397: static void w_object(VALUE,struct dump_arg*,int);
398:
399: static int
400: hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
401: {
402: w_object(key, arg->arg, arg->limit);
403: w_object(value, arg->arg, arg->limit);
404: return ST_CONTINUE;
405: }
406:
407: static void
408: w_extended(VALUE klass, struct dump_arg *arg, int check)
409: {
410: char *path;
411:
412: if (check && FL_TEST(klass, FL_SINGLETON)) {
413: if (RCLASS_M_TBL(klass)->num_entries ||
414: (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
415: rb_raise(rb_eTypeError, "singleton can't be dumped");
416: }
417: klass = RCLASS_SUPER(klass);
418: }
419: while (BUILTIN_TYPE(klass) == T_ICLASS) {
420: path = rb_class2name(RBASIC(klass)->klass);
421: w_byte(TYPE_EXTENDED, arg);
422: w_unique(path, arg);
423: klass = RCLASS_SUPER(klass);
424: }
425: }
426:
427: static void
428: w_class(char type, VALUE obj, struct dump_arg *arg, int check)
429: {
430: volatile VALUE p;
431: char *path;
432: st_data_t real_obj;
433: VALUE klass;
434:
435: if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
436: obj = (VALUE)real_obj;
437: }
438: klass = CLASS_OF(obj);
439: w_extended(klass, arg, check);
440: w_byte(type, arg);
441: p = class2path(rb_class_real(klass));
442: path = RSTRING_PTR(p);
443: w_unique(path, arg);
444: }
445:
446: static void
447: w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
448: {
449: VALUE klass = CLASS_OF(obj);
450:
451: w_extended(klass, arg, Qtrue);
452: klass = rb_class_real(klass);
453: if (klass != super) {
454: w_byte(TYPE_UCLASS, arg);
455: w_unique(RSTRING_PTR(class2path(klass)), arg);
456: }
457: }
458:
459: static int
460: w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
461: {
462: if (id == rb_id_encoding()) return ST_CONTINUE;
463: w_symbol(id, arg->arg);
464: w_object(value, arg->arg, arg->limit);
465: return ST_CONTINUE;
466: }
467:
468: static void
469: w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
470: {
471: int encidx = rb_enc_get_index(obj);
472: rb_encoding *enc = 0;
473: st_data_t name;
474:
475: if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
476: w_long(num, arg->arg);
477: return;
478: }
479: w_long(num + 1, arg->arg);
480: w_symbol(rb_id_encoding(), arg->arg);
481: do {
482: if (!arg->arg->encodings)
483: arg->arg->encodings = st_init_strcasetable();
484: else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
485: break;
486: name = (st_data_t)rb_str_new2(rb_enc_name(enc));
487: st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
488: } while (0);
489: w_object(name, arg->arg, arg->limit);
490: }
491:
492: static void
493: w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
494: {
495: long num = tbl ? tbl->num_entries : 0;
496:
497: w_encoding(obj, num, arg);
498: if (tbl) {
499: st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
500: }
501: }
502:
503: static void
504: w_objivar(VALUE obj, struct dump_call_arg *arg)
505: {
506: VALUE *ptr;
507: long i, len, num;
508:
509: len = ROBJECT_LEN(obj);
510: ptr = ROBJECT_PTR(obj);
511: num = 0;
512: for (i = 0; i < len; i++)
513: if (ptr[i] != Qundef)
514: num += 1;
515:
516: w_encoding(obj, num, arg);
517: if (num != 0) {
518: rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
519: }
520: }
521:
522: static void
523: w_object(VALUE obj, struct dump_arg *arg, int limit)
524: {
525: struct dump_call_arg c_arg;
526: st_table *ivtbl = 0;
527: st_data_t num;
528: int hasiv = 0;
529: #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
530: (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj)))
531:
532: if (limit == 0) {
533: rb_raise(rb_eArgError, "exceed depth limit");
534: }
535:
536: limit--;
537: c_arg.limit = limit;
538: c_arg.arg = arg;
539:
540: if (st_lookup(arg->data, obj, &num)) {
541: w_byte(TYPE_LINK, arg);
542: w_long((long)num, arg);
543: return;
544: }
545:
546: if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
547: w_byte(TYPE_IVAR, arg);
548: }
549: if (obj == Qnil) {
550: w_byte(TYPE_NIL, arg);
551: }
552: else if (obj == Qtrue) {
553: w_byte(TYPE_TRUE, arg);
554: }
555: else if (obj == Qfalse) {
556: w_byte(TYPE_FALSE, arg);
557: }
558: else if (FIXNUM_P(obj)) {
559: #if SIZEOF_LONG <= 4
560: w_byte(TYPE_FIXNUM, arg);
561: w_long(FIX2INT(obj), arg);
562: #else
563: if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
564: w_byte(TYPE_FIXNUM, arg);
565: w_long(FIX2LONG(obj), arg);<