diff options
Diffstat (limited to 'marshal.c')
-rw-r--r-- | marshal.c | 1969 |
1 files changed, 1075 insertions, 894 deletions
@@ -28,14 +28,18 @@ #include "internal/encoding.h" #include "internal/error.h" #include "internal/hash.h" +#include "internal/numeric.h" #include "internal/object.h" #include "internal/struct.h" +#include "internal/symbol.h" #include "internal/util.h" #include "internal/vm.h" #include "ruby/io.h" #include "ruby/ruby.h" #include "ruby/st.h" #include "ruby/util.h" +#include "builtin.h" +#include "shape.h" #define BITSPERSHORT (2*CHAR_BIT) #define SHORTMASK ((1<<BITSPERSHORT)-1) @@ -52,8 +56,8 @@ shortlen(size_t len, BDIGIT *ds) num = ds[len-1]; while (num) { - num = SHORTDN(num); - offset++; + num = SHORTDN(num); + offset++; } return (len - 1)*SIZEOF_BDIGIT/2 + offset; } @@ -122,7 +126,7 @@ typedef struct { static st_table *compat_allocator_tbl; static VALUE compat_allocator_tbl_wrapper; static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit); -static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc); +static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze); static int mark_marshal_compat_i(st_data_t key, st_data_t value, st_data_t _) @@ -169,6 +173,7 @@ struct dump_arg { st_table *data; st_table *compat_tbl; st_table *encodings; + st_index_t num_entries; }; struct dump_call_arg { @@ -182,20 +187,20 @@ check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name) { if (!arg->symbols) { rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s", - name); + name); } return ret; } static VALUE check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv, - struct dump_arg *arg, const char *name) + struct dump_arg *arg, const char *name) { VALUE ret = rb_funcallv(obj, sym, argc, argv); VALUE klass = CLASS_OF(obj); if (CLASS_OF(ret) == klass) { rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance", - klass, name); + klass, name); } return check_dump_arg(ret, arg, name); } @@ -223,19 +228,24 @@ static void free_dump_arg(void *ptr) { clear_dump_arg(ptr); - xfree(ptr); } static size_t memsize_dump_arg(const void *ptr) { - return sizeof(struct dump_arg); + const struct dump_arg *p = (struct dump_arg *)ptr; + size_t memsize = 0; + if (p->symbols) memsize += rb_st_memsize(p->symbols); + if (p->data) memsize += rb_st_memsize(p->data); + if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl); + if (p->encodings) memsize += rb_st_memsize(p->encodings); + return memsize; } static const rb_data_type_t dump_arg_data = { "dump_arg", {mark_dump_arg, free_dump_arg, memsize_dump_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE }; static VALUE @@ -244,13 +254,13 @@ must_not_be_anonymous(const char *type, VALUE path) char *n = RSTRING_PTR(path); if (!rb_enc_asciicompat(rb_enc_get(path))) { - /* cannot occur? */ - rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE, - type, path); + /* cannot occur? */ + rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE, + type, path); } if (n[0] == '#') { - rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE, - type, path); + rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE, + type, path); } return path; } @@ -262,7 +272,7 @@ class2path(VALUE klass) must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path); if (rb_path_to_class(path) != rb_class_real(klass)) { - rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path); + rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path); } return path; } @@ -278,8 +288,8 @@ w_nbyte(const char *s, long n, struct dump_arg *arg) VALUE buf = arg->str; rb_str_buf_cat(buf, s, n); if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { - rb_io_write(arg->dest, buf); - rb_str_resize(buf, 0); + rb_io_write(arg->dest, buf); + rb_str_resize(buf, 0); } } @@ -323,7 +333,7 @@ ruby_marshal_write_long(long x, char *buf) #if SIZEOF_LONG > 4 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) { - /* big long does not fit in 4 bytes */ + /* big long does not fit in 4 bytes */ return -1; } #endif @@ -341,16 +351,16 @@ ruby_marshal_write_long(long x, char *buf) return 1; } for (i=1;i<(int)sizeof(long)+1;i++) { - buf[i] = (char)(x & 0xff); - x = RSHIFT(x,8); - if (x == 0) { - buf[0] = i; - break; - } - if (x == -1) { - buf[0] = -i; - break; - } + buf[i] = (char)(x & 0xff); + x = RSHIFT(x,8); + if (x == 0) { + buf[0] = i; + break; + } + if (x == -1) { + buf[0] = -i; + break; + } } return i+1; } @@ -373,13 +383,13 @@ load_mantissa(double d, const char *buf, long len) { if (!len) return d; if (--len > 0 && !*buf++) { /* binary mantissa mark */ - int e, s = d < 0, dig = 0; - unsigned long m; + int e, s = d < 0, dig = 0; + unsigned long m; - modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); - do { - m = 0; - switch (len) { + modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); + do { + m = 0; + switch (len) { default: m = *buf++ & 0xff; /* fall through */ #if MANT_BITS > 24 case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */ @@ -388,14 +398,14 @@ load_mantissa(double d, const char *buf, long len) case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */ #endif #if MANT_BITS > 8 - case 1: m = (m << 8) | (*buf++ & 0xff); + case 1: m = (m << 8) | (*buf++ & 0xff); #endif - } - dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; - d += ldexp((double)m, dig); - } while ((len -= MANT_BITS / 8) > 0); - d = ldexp(d, e - DECIMAL_MANT); - if (s) d = -d; + } + dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; + d += ldexp((double)m, dig); + } while ((len -= MANT_BITS / 8) > 0); + d = ldexp(d, e - DECIMAL_MANT); + if (s) d = -d; } return d; } @@ -415,49 +425,49 @@ w_float(double d, struct dump_arg *arg) char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10]; if (isinf(d)) { - if (d < 0) w_cstr("-inf", arg); - else w_cstr("inf", arg); + if (d < 0) w_cstr("-inf", arg); + else w_cstr("inf", arg); } else if (isnan(d)) { - w_cstr("nan", arg); + w_cstr("nan", arg); } else if (d == 0.0) { if (signbit(d)) w_cstr("-0", arg); else w_cstr("0", arg); } else { - int decpt, sign, digs, len = 0; - char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e); - if (sign) buf[len++] = '-'; - digs = (int)(e - p); - if (decpt < -3 || decpt > digs) { - buf[len++] = p[0]; - if (--digs > 0) buf[len++] = '.'; - memcpy(buf + len, p + 1, digs); - len += digs; - len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1); - } - else if (decpt > 0) { - memcpy(buf + len, p, decpt); - len += decpt; - if ((digs -= decpt) > 0) { - buf[len++] = '.'; - memcpy(buf + len, p + decpt, digs); - len += digs; - } - } - else { - buf[len++] = '0'; - buf[len++] = '.'; - if (decpt) { - memset(buf + len, '0', -decpt); - len -= decpt; - } - memcpy(buf + len, p, digs); - len += digs; - } - xfree(p); - w_bytes(buf, len, arg); + int decpt, sign, digs, len = 0; + char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e); + if (sign) buf[len++] = '-'; + digs = (int)(e - p); + if (decpt < -3 || decpt > digs) { + buf[len++] = p[0]; + if (--digs > 0) buf[len++] = '.'; + memcpy(buf + len, p + 1, digs); + len += digs; + len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1); + } + else if (decpt > 0) { + memcpy(buf + len, p, decpt); + len += decpt; + if ((digs -= decpt) > 0) { + buf[len++] = '.'; + memcpy(buf + len, p + decpt, digs); + len += digs; + } + } + else { + buf[len++] = '0'; + buf[len++] = '.'; + if (decpt) { + memset(buf + len, '0', -decpt); + len -= decpt; + } + memcpy(buf + len, p, digs); + len += digs; + } + free(p); + w_bytes(buf, len, arg); } } @@ -468,33 +478,33 @@ w_symbol(VALUE sym, struct dump_arg *arg) VALUE encname; if (st_lookup(arg->symbols, sym, &num)) { - w_byte(TYPE_SYMLINK, arg); - w_long((long)num, arg); + w_byte(TYPE_SYMLINK, arg); + w_long((long)num, arg); } else { - const VALUE orig_sym = sym; - sym = rb_sym2str(sym); - if (!sym) { - rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym); - } - encname = encoding_name(sym, arg); - if (NIL_P(encname) || - rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) { - encname = Qnil; - } - else { - w_byte(TYPE_IVAR, arg); - } - w_byte(TYPE_SYMBOL, arg); - w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); - st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries); - if (!NIL_P(encname)) { - struct dump_call_arg c_arg; - c_arg.limit = 1; - c_arg.arg = arg; - w_long(1L, arg); - w_encoding(encname, &c_arg); - } + const VALUE orig_sym = sym; + sym = rb_sym2str(sym); + if (!sym) { + rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym); + } + encname = encoding_name(sym, arg); + if (NIL_P(encname) || + is_ascii_string(sym)) { + encname = Qnil; + } + else { + w_byte(TYPE_IVAR, arg); + } + w_byte(TYPE_SYMBOL, arg); + w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); + st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries); + if (!NIL_P(encname)) { + struct dump_call_arg c_arg; + c_arg.limit = 1; + c_arg.arg = arg; + w_long(1L, arg); + w_encoding(encname, &c_arg); + } } } @@ -518,18 +528,18 @@ hash_each(VALUE key, VALUE value, VALUE v) #define SINGLETON_DUMP_UNABLE_P(klass) \ (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \ - (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) + rb_ivar_count(klass) > 0) static void w_extended(VALUE klass, struct dump_arg *arg, int check) { - if (check && FL_TEST(klass, FL_SINGLETON)) { - VALUE origin = RCLASS_ORIGIN(klass); - if (SINGLETON_DUMP_UNABLE_P(klass) || - (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) { - rb_raise(rb_eTypeError, "singleton can't be dumped"); - } - klass = RCLASS_SUPER(klass); + if (check && RCLASS_SINGLETON_P(klass)) { + VALUE origin = RCLASS_ORIGIN(klass); + if (SINGLETON_DUMP_UNABLE_P(klass) || + (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) { + rb_raise(rb_eTypeError, "singleton can't be dumped"); + } + klass = RCLASS_SUPER(klass); } while (BUILTIN_TYPE(klass) == T_ICLASS) { if (!FL_TEST(klass, RICLASS_IS_ORIGIN) || @@ -550,7 +560,7 @@ w_class(char type, VALUE obj, struct dump_arg *arg, int check) VALUE klass; if (arg->compat_tbl && - st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) { + st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) { obj = (VALUE)real_obj; } klass = CLASS_OF(obj); @@ -568,12 +578,31 @@ w_uclass(VALUE obj, VALUE super, struct dump_arg *arg) w_extended(klass, arg, TRUE); klass = rb_class_real(klass); if (klass != super) { - w_byte(TYPE_UCLASS, arg); - w_unique(class2path(klass), arg); + w_byte(TYPE_UCLASS, arg); + w_unique(class2path(klass), arg); } } -#define to_be_skipped_id(id) (id == rb_id_encoding() || id == s_encoding_short || id == s_ruby2_keywords_flag || !rb_id2str(id)) +static bool +rb_hash_ruby2_keywords_p(VALUE obj) +{ + return (RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS) != 0; +} + +static void +rb_hash_ruby2_keywords(VALUE obj) +{ + RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS; +} + +static inline bool +to_be_skipped_id(const ID id) +{ + if (id == s_encoding_short) return true; + if (id == s_ruby2_keywords_flag) return true; + if (id == rb_id_encoding()) return true; + return !rb_id2str(id); +} struct w_ivar_arg { struct dump_call_arg *dump; @@ -581,28 +610,22 @@ struct w_ivar_arg { }; static int -w_obj_each(st_data_t key, st_data_t val, st_data_t a) +w_obj_each(ID id, VALUE value, st_data_t a) { - ID id = (ID)key; - VALUE value = (VALUE)val; struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a; struct dump_call_arg *arg = ivarg->dump; if (to_be_skipped_id(id)) { if (id == s_encoding_short) { - rb_warn("instance variable `"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped", + rb_warn("instance variable '"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped", CLASS_OF(arg->obj)); } if (id == s_ruby2_keywords_flag) { - rb_warn("instance variable `"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped", + rb_warn("instance variable '"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped", CLASS_OF(arg->obj)); } return ST_CONTINUE; } - if (!ivarg->num_ivar) { - rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance", - CLASS_OF(arg->obj)); - } --ivarg->num_ivar; w_symbol(ID2SYM(id), arg->arg); w_object(value, arg->arg, arg->limit); @@ -610,10 +633,11 @@ w_obj_each(st_data_t key, st_data_t val, st_data_t a) } static int -obj_count_ivars(st_data_t key, st_data_t val, st_data_t a) +obj_count_ivars(ID id, VALUE val, st_data_t a) { - ID id = (ID)key; - if (!to_be_skipped_id(id)) ++*(st_index_t *)a; + if (!to_be_skipped_id(id) && UNLIKELY(!++*(st_index_t *)a)) { + rb_raise(rb_eRuntimeError, "too many instance variables"); + } return ST_CONTINUE; } @@ -659,7 +683,7 @@ w_encoding(VALUE encname, struct dump_call_arg *arg) case Qfalse: case Qtrue: w_symbol(ID2SYM(s_encoding_short), arg->arg); - w_object(encname, arg->arg, limit); + w_object(encname, arg->arg, limit); return 1; case Qnil: return 0; @@ -672,37 +696,50 @@ w_encoding(VALUE encname, struct dump_call_arg *arg) static st_index_t has_ivars(VALUE obj, VALUE encname, VALUE *ivobj) { - st_index_t enc = !NIL_P(encname); - st_index_t num = 0; - st_index_t ruby2_keywords_flag = 0; + st_index_t num = !NIL_P(encname); if (SPECIAL_CONST_P(obj)) goto generic; switch (BUILTIN_TYPE(obj)) { case T_OBJECT: case T_CLASS: case T_MODULE: - break; /* counted elsewhere */ + break; /* counted elsewhere */ case T_HASH: - ruby2_keywords_flag = RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS ? 1 : 0; + if (rb_hash_ruby2_keywords_p(obj)) ++num; /* fall through */ default: generic: - rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num); - if (ruby2_keywords_flag || num) *ivobj = obj; + rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num); + if (num) *ivobj = obj; } - return num + enc + ruby2_keywords_flag; + return num; } static void w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg) { + shape_id_t shape_id = rb_shape_get_shape_id(arg->obj); struct w_ivar_arg ivarg = {arg, num}; if (!num) return; rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg); - if (ivarg.num_ivar) { - rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance", - CLASS_OF(arg->obj)); + + if (shape_id != rb_shape_get_shape_id(arg->obj)) { + rb_shape_t * expected_shape = rb_shape_get_shape_by_id(shape_id); + rb_shape_t * actual_shape = rb_shape_get_shape(arg->obj); + + // If the shape tree got _shorter_ then we probably removed an IV + // If the shape tree got longer, then we probably added an IV. + // The exception message might not be accurate when someone adds and + // removes the same number of IVs, but they will still get an exception + if (rb_shape_depth(expected_shape) > rb_shape_depth(actual_shape)) { + rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance", + CLASS_OF(arg->obj)); + } + else { + rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance", + CLASS_OF(arg->obj)); + } } } @@ -711,14 +748,14 @@ w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg) { w_long(num, arg->arg); num -= w_encoding(encname, arg); - if (RB_TYPE_P(ivobj, T_HASH) && (RHASH(ivobj)->basic.flags & RHASH_PASS_AS_KEYWORDS)) { + if (RB_TYPE_P(ivobj, T_HASH) && rb_hash_ruby2_keywords_p(ivobj)) { int limit = arg->limit; if (limit >= 0) ++limit; w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg); - w_object(Qtrue, arg->arg, limit); + w_object(Qtrue, arg->arg, limit); num--; } - if (ivobj != Qundef && num) { + if (!UNDEF_P(ivobj) && num) { w_ivar_each(ivobj, num, arg); } } @@ -733,6 +770,62 @@ w_objivar(VALUE obj, struct dump_call_arg *arg) w_ivar_each(obj, num, arg); } +#if SIZEOF_LONG > 4 +// Optimized dump for fixnum larger than 31-bits +static void +w_bigfixnum(VALUE obj, struct dump_arg *arg) +{ + RUBY_ASSERT(FIXNUM_P(obj)); + + w_byte(TYPE_BIGNUM, arg); + +#if SIZEOF_LONG == SIZEOF_VALUE + long num, slen_num; + num = FIX2LONG(obj); +#else + long long num, slen_num; + num = NUM2LL(obj); +#endif + + char sign = num < 0 ? '-' : '+'; + w_byte(sign, arg); + + // Guaranteed not to overflow, as FIXNUM is 1-bit less than long + if (num < 0) num = -num; + + // calculate the size in shorts + int slen = 0; + { + slen_num = num; + while (slen_num) { + slen++; + slen_num = SHORTDN(slen_num); + } + } + + RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2); + + w_long((long)slen, arg); + + for (int i = 0; i < slen; i++) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); + } + + // We aren't adding this object to the link table, but we need to increment + // the index. + arg->num_entries++; + + RUBY_ASSERT(num == 0); +} +#endif + +static void +w_remember(VALUE obj, struct dump_arg *arg) +{ + st_add_direct(arg->data, obj, arg->num_entries++); +} + static void w_object(VALUE obj, struct dump_arg *arg, int limit) { @@ -743,97 +836,99 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) VALUE encname = Qnil; if (limit == 0) { - rb_raise(rb_eArgError, "exceed depth limit"); - } - - if (limit > 0) limit--; - c_arg.limit = limit; - c_arg.arg = arg; - c_arg.obj = obj; - - if (st_lookup(arg->data, obj, &num)) { - w_byte(TYPE_LINK, arg); - w_long((long)num, arg); - return; + rb_raise(rb_eArgError, "exceed depth limit"); } - if (obj == Qnil) { - w_byte(TYPE_NIL, arg); + if (NIL_P(obj)) { + w_byte(TYPE_NIL, arg); } else if (obj == Qtrue) { - w_byte(TYPE_TRUE, arg); + w_byte(TYPE_TRUE, arg); } else if (obj == Qfalse) { - w_byte(TYPE_FALSE, arg); + w_byte(TYPE_FALSE, arg); } else if (FIXNUM_P(obj)) { #if SIZEOF_LONG <= 4 - w_byte(TYPE_FIXNUM, arg); - w_long(FIX2INT(obj), arg); + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2INT(obj), arg); #else - if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { - w_byte(TYPE_FIXNUM, arg); - w_long(FIX2LONG(obj), arg); - } - else { - w_object(rb_int2big(FIX2LONG(obj)), arg, limit); - } + if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2LONG(obj), arg); + } + else { + w_bigfixnum(obj, arg); + } #endif } else if (SYMBOL_P(obj)) { - w_symbol(obj, arg); - } - else if (FLONUM_P(obj)) { - st_add_direct(arg->data, obj, arg->data->num_entries); - w_byte(TYPE_FLOAT, arg); - w_float(RFLOAT_VALUE(obj), arg); + w_symbol(obj, arg); } else { - VALUE v; - - if (!RBASIC_CLASS(obj)) { - rb_raise(rb_eTypeError, "can't dump internal %s", - rb_builtin_type_name(BUILTIN_TYPE(obj))); - } - - if (rb_obj_respond_to(obj, s_mdump, TRUE)) { - st_add_direct(arg->data, obj, arg->data->num_entries); - - v = dump_funcall(arg, obj, s_mdump, 0, 0); - w_class(TYPE_USRMARSHAL, obj, arg, FALSE); - w_object(v, arg, limit); - return; - } - if (rb_obj_respond_to(obj, s_dump, TRUE)) { - VALUE ivobj2 = Qundef; - st_index_t hasiv2; - VALUE encname2; - - v = INT2NUM(limit); - v = dump_funcall(arg, obj, s_dump, 1, &v); - if (!RB_TYPE_P(v, T_STRING)) { - rb_raise(rb_eTypeError, "_dump() must return string"); - } - hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); - hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2); - if (hasiv2) { - hasiv = hasiv2; - ivobj = ivobj2; - encname = encname2; - } - if (hasiv) w_byte(TYPE_IVAR, arg); - w_class(TYPE_USERDEF, obj, arg, FALSE); - w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); - if (hasiv) { - w_ivar(hasiv, ivobj, encname, &c_arg); - } - st_add_direct(arg->data, obj, arg->data->num_entries); - return; - } - - st_add_direct(arg->data, obj, arg->data->num_entries); - - hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if (limit > 0) limit--; + c_arg.limit = limit; + c_arg.arg = arg; + c_arg.obj = obj; + + if (FLONUM_P(obj)) { + w_remember(obj, arg); + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + return; + } + + VALUE v; + + if (!RBASIC_CLASS(obj)) { + rb_raise(rb_eTypeError, "can't dump internal %s", + rb_builtin_type_name(BUILTIN_TYPE(obj))); + } + + if (rb_obj_respond_to(obj, s_mdump, TRUE)) { + w_remember(obj, arg); + + v = dump_funcall(arg, obj, s_mdump, 0, 0); + w_class(TYPE_USRMARSHAL, obj, arg, FALSE); + w_object(v, arg, limit); + return; + } + if (rb_obj_respond_to(obj, s_dump, TRUE)) { + VALUE ivobj2 = Qundef; + st_index_t hasiv2; + VALUE encname2; + + v = INT2NUM(limit); + v = dump_funcall(arg, obj, s_dump, 1, &v); + if (!RB_TYPE_P(v, T_STRING)) { + rb_raise(rb_eTypeError, "_dump() must return string"); + } + hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); + hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2); + if (hasiv2) { + hasiv = hasiv2; + ivobj = ivobj2; + encname = encname2; + } + if (hasiv) w_byte(TYPE_IVAR, arg); + w_class(TYPE_USERDEF, obj, arg, FALSE); + w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); + if (hasiv) { + w_ivar(hasiv, ivobj, encname, &c_arg); + } + w_remember(obj, arg); + return; + } + + w_remember(obj, arg); + + hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); { st_data_t compat_data; rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass); @@ -847,79 +942,79 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) arg->compat_tbl = rb_init_identtable(); } st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); - if (obj != real_obj && ivobj == Qundef) hasiv = 0; + if (obj != real_obj && UNDEF_P(ivobj)) hasiv = 0; } } - if (hasiv) w_byte(TYPE_IVAR, arg); - - switch (BUILTIN_TYPE(obj)) { - case T_CLASS: - if (FL_TEST(obj, FL_SINGLETON)) { - rb_raise(rb_eTypeError, "singleton class can't be dumped"); - } - w_byte(TYPE_CLASS, arg); - { - VALUE path = class2path(obj); - w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); - RB_GC_GUARD(path); - } - break; - - case T_MODULE: - w_byte(TYPE_MODULE, arg); - { - VALUE path = class2path(obj); - w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); - RB_GC_GUARD(path); - } - break; - - case T_FLOAT: - w_byte(TYPE_FLOAT, arg); - w_float(RFLOAT_VALUE(obj), arg); - break; - - case T_BIGNUM: - w_byte(TYPE_BIGNUM, arg); - { - char sign = BIGNUM_SIGN(obj) ? '+' : '-'; - size_t len = BIGNUM_LEN(obj); - size_t slen; + if (hasiv) w_byte(TYPE_IVAR, arg); + + switch (BUILTIN_TYPE(obj)) { + case T_CLASS: + if (FL_TEST(obj, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton class can't be dumped"); + } + w_byte(TYPE_CLASS, arg); + { + VALUE path = class2path(obj); + w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); + RB_GC_GUARD(path); + } + break; + + case T_MODULE: + w_byte(TYPE_MODULE, arg); + { + VALUE path = class2path(obj); + w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); + RB_GC_GUARD(path); + } + break; + + case T_FLOAT: + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + break; + + case T_BIGNUM: + w_byte(TYPE_BIGNUM, arg); + { + char sign = BIGNUM_SIGN(obj) ? '+' : '-'; + size_t len = BIGNUM_LEN(obj); + size_t slen; size_t j; - BDIGIT *d = BIGNUM_DIGITS(obj); + BDIGIT *d = BIGNUM_DIGITS(obj); slen = SHORTLEN(len); if (LONG_MAX < slen) { rb_raise(rb_eTypeError, "too big Bignum can't be dumped"); } - w_byte(sign, arg); - w_long((long)slen, arg); + w_byte(sign, arg); + w_long((long)slen, arg); for (j = 0; j < len; j++) { #if SIZEOF_BDIGIT > SIZEOF_SHORT - BDIGIT num = *d; - int i; + BDIGIT num = *d; + int i; - for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) { - w_short(num & SHORTMASK, arg); - num = SHORTDN(num); + for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); if (j == len - 1 && num == 0) break; - } + } #else - w_short(*d, arg); + w_short(*d, arg); #endif - d++; - } - } - break; - - case T_STRING: - w_uclass(obj, rb_cString, arg); - w_byte(TYPE_STRING, arg); - w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg); - break; - - case T_REGEXP: + d++; + } + } + break; + + case T_STRING: + w_uclass(obj, rb_cString, arg); + w_byte(TYPE_STRING, arg); + w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg); + break; + + case T_REGEXP: w_uclass(obj, rb_cRegexp, arg); w_byte(TYPE_REGEXP, arg); { @@ -927,87 +1022,91 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg); w_byte((char)opts, arg); } - break; - - case T_ARRAY: - w_uclass(obj, rb_cArray, arg); - w_byte(TYPE_ARRAY, arg); - { - long i, len = RARRAY_LEN(obj); - - w_long(len, arg); - for (i=0; i<RARRAY_LEN(obj); i++) { - w_object(RARRAY_AREF(obj, i), arg, limit); - if (len != RARRAY_LEN(obj)) { - rb_raise(rb_eRuntimeError, "array modified during dump"); - } - } - } - break; - - case T_HASH: - w_uclass(obj, rb_cHash, arg); - if (NIL_P(RHASH_IFNONE(obj))) { - w_byte(TYPE_HASH, arg); - } + break; + + case T_ARRAY: + w_uclass(obj, rb_cArray, arg); + w_byte(TYPE_ARRAY, arg); + { + long i, len = RARRAY_LEN(obj); + + w_long(len, arg); + for (i=0; i<RARRAY_LEN(obj); i++) { + w_object(RARRAY_AREF(obj, i), arg, limit); + if (len != RARRAY_LEN(obj)) { + rb_raise(rb_eRuntimeError, "array modified during dump"); + } + } + } + break; + + case T_HASH: + w_uclass(obj, rb_cHash, arg); + if (rb_hash_compare_by_id_p(obj)) { + w_byte(TYPE_UCLASS, arg); + w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg); + } + if (NIL_P(RHASH_IFNONE(obj))) { + w_byte(TYPE_HASH, arg); + } else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) { - rb_raise(rb_eTypeError, "can't dump hash with default proc"); - } - else { - w_byte(TYPE_HASH_DEF, arg); - } + rb_raise(rb_eTypeError, "can't dump hash with default proc"); + } + else { + w_byte(TYPE_HASH_DEF, arg); + } w_long(rb_hash_size_num(obj), arg); - rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); - if (!NIL_P(RHASH_IFNONE(obj))) { - w_object(RHASH_IFNONE(obj), arg, limit); - } - break; - - case T_STRUCT: - w_class(TYPE_STRUCT, obj, arg, TRUE); - { - long len = RSTRUCT_LEN(obj); - VALUE mem; - long i; - - w_long(len, arg); - mem = rb_struct_members(obj); - for (i=0; i<len; i++) { - w_symbol(RARRAY_AREF(mem, i), arg); - w_object(RSTRUCT_GET(obj, i), arg, limit); - } - } - break; - - case T_OBJECT: - w_class(TYPE_OBJECT, obj, arg, TRUE); - w_objivar(obj, &c_arg); - break; - - case T_DATA: - { - VALUE v; - - if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) { - rb_raise(rb_eTypeError, - "no _dump_data is defined for class %"PRIsVALUE, - rb_obj_class(obj)); - } - v = dump_funcall(arg, obj, s_dump_data, 0, 0); - w_class(TYPE_DATA, obj, arg, TRUE); - w_object(v, arg, limit); - } - break; - - default: - rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE, - rb_obj_class(obj)); - break; - } - RB_GC_GUARD(obj); + rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); + if (!NIL_P(RHASH_IFNONE(obj))) { + w_object(RHASH_IFNONE(obj), arg, limit); + } + break; + + case T_STRUCT: + w_class(TYPE_STRUCT, obj, arg, TRUE); + { + long len = RSTRUCT_LEN(obj); + VALUE mem; + long i; + + w_long(len, arg); + mem = rb_struct_members(obj); + for (i=0; i<len; i++) { + w_symbol(RARRAY_AREF(mem, i), arg); + w_object(RSTRUCT_GET(obj, i), arg, limit); + } + } + break; + + case T_OBJECT: + w_class(TYPE_OBJECT, obj, arg, TRUE); + w_objivar(obj, &c_arg); + break; + + case T_DATA: + { + VALUE v; + + if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) { + rb_raise(rb_eTypeError, + "no _dump_data is defined for class %"PRIsVALUE, + rb_obj_class(obj)); + } + v = dump_funcall(arg, obj, s_dump_data, 0, 0); + w_class(TYPE_DATA, obj, arg, TRUE); + w_object(v, arg, limit); + } + break; + + default: + rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE, + rb_obj_class(obj)); + break; + } + RB_GC_GUARD(obj); } if (hasiv) { - w_ivar(hasiv, ivobj, encname, &c_arg); + w_ivar(hasiv, ivobj, encname, &c_arg); } } @@ -1019,13 +1118,14 @@ clear_dump_arg(struct dump_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + arg->num_entries = 0; if (arg->compat_tbl) { - st_free_table(arg->compat_tbl); - arg->compat_tbl = 0; + st_free_table(arg->compat_tbl); + arg->compat_tbl = 0; } if (arg->encodings) { - st_free_table(arg->encodings); - arg->encodings = 0; + st_free_table(arg->encodings); + arg->encodings = 0; } } @@ -1079,14 +1179,14 @@ marshal_dump(int argc, VALUE *argv, VALUE _) port = Qnil; rb_scan_args(argc, argv, "12", &obj, &a1, &a2); if (argc == 3) { - if (!NIL_P(a2)) limit = NUM2INT(a2); - if (NIL_P(a1)) io_needed(); - port = a1; + if (!NIL_P(a2)) limit = NUM2INT(a2); + if (NIL_P(a1)) io_needed(); + port = a1; } else if (argc == 2) { - if (FIXNUM_P(a1)) limit = FIX2INT(a1); - else if (NIL_P(a1)) io_needed(); - else port = a1; + if (FIXNUM_P(a1)) limit = FIX2INT(a1); + else if (NIL_P(a1)) io_needed(); + else port = a1; } return rb_marshal_dump_limited(obj, port, limit); } @@ -1101,18 +1201,19 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit) arg->dest = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->num_entries = 0; arg->compat_tbl = 0; arg->encodings = 0; arg->str = rb_str_buf_new(0); if (!NIL_P(port)) { - if (!rb_respond_to(port, s_write)) { - io_needed(); - } - arg->dest = port; - dump_check_funcall(arg, port, s_binmode, 0, 0); + if (!rb_respond_to(port, s_write)) { + io_needed(); + } + arg->dest = port; + dump_check_funcall(arg, port, s_binmode, 0, 0); } else { - port = arg->str; + port = arg->str; } w_byte(MARSHAL_MAJOR, arg); @@ -1120,8 +1221,8 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit) w_object(obj, arg, limit); if (arg->dest) { - rb_io_write(arg->dest, arg->str); - rb_str_resize(arg->str, 0); + rb_io_write(arg->dest, arg->str); + rb_str_resize(arg->str, 0); } clear_dump_arg(arg); RB_GC_GUARD(wrapper); @@ -1137,8 +1238,10 @@ struct load_arg { long offset; st_table *symbols; st_table *data; + st_table *partial_objects; VALUE proc; st_table *compat_tbl; + bool freeze; }; static VALUE @@ -1146,7 +1249,7 @@ check_load_arg(VALUE ret, struct load_arg *arg, const char *name) { if (!arg->symbols) { rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s", - name); + name); } return ret; } @@ -1163,6 +1266,7 @@ mark_load_arg(void *ptr) return; rb_mark_tbl(p->symbols); rb_mark_tbl(p->data); + rb_mark_tbl(p->partial_objects); rb_mark_hash(p->compat_tbl); } @@ -1170,19 +1274,24 @@ static void free_load_arg(void *ptr) { clear_load_arg(ptr); - xfree(ptr); } static size_t memsize_load_arg(const void *ptr) { - return sizeof(struct load_arg); + const struct load_arg *p = (struct load_arg *)ptr; + size_t memsize = 0; + if (p->symbols) memsize += rb_st_memsize(p->symbols); + if (p->data) memsize += rb_st_memsize(p->data); + if (p->partial_objects) memsize += rb_st_memsize(p->partial_objects); + if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl); + return memsize; } static const rb_data_type_t load_arg_data = { "load_arg", {mark_load_arg, free_load_arg, memsize_load_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE }; #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg)) @@ -1209,15 +1318,15 @@ static unsigned char r_byte1_buffered(struct load_arg *arg) { if (arg->buflen == 0) { - long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ; - VALUE str, n = LONG2NUM(readable); + long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ; + VALUE str, n = LONG2NUM(readable); - str = load_funcall(arg, arg->src, s_read, 1, &n); - if (NIL_P(str)) too_short(); - StringValue(str); - memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str)); - arg->offset = 0; - arg->buflen = RSTRING_LEN(str); + str = load_funcall(arg, arg->src, s_read, 1, &n); + if (NIL_P(str)) too_short(); + StringValue(str); + memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str)); + arg->offset = 0; + arg->buflen = RSTRING_LEN(str); } arg->buflen--; return arg->buf[arg->offset++]; @@ -1229,22 +1338,22 @@ r_byte(struct load_arg *arg) int c; if (RB_TYPE_P(arg->src, T_STRING)) { - if (RSTRING_LEN(arg->src) > arg->offset) { - c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++]; - } - else { - too_short(); - } + if (RSTRING_LEN(arg->src) > arg->offset) { + c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++]; + } + else { + too_short(); + } } else { - if (arg->readable >0 || arg->buflen > 0) { - c = r_byte1_buffered(arg); - } - else { - VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0); - if (NIL_P(v)) rb_eof_error(); - c = (unsigned char)NUM2CHR(v); - } + if (arg->readable >0 || arg->buflen > 0) { + c = r_byte1_buffered(arg); + } + else { + VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0); + if (NIL_P(v)) rb_eof_error(); + c = (unsigned char)NUM2CHR(v); + } } return c; } @@ -1255,7 +1364,7 @@ static void long_toobig(int size) { rb_raise(rb_eTypeError, "long too big for this architecture (size " - STRINGIZE(SIZEOF_LONG)", given %d)", size); + STRINGIZE(SIZEOF_LONG)", given %d)", size); } static long @@ -1267,26 +1376,26 @@ r_long(struct load_arg *arg) if (c == 0) return 0; if (c > 0) { - if (4 < c && c < 128) { - return c - 5; - } - if (c > (int)sizeof(long)) long_toobig(c); - x = 0; - for (i=0;i<c;i++) { - x |= (long)r_byte(arg) << (8*i); - } + if (4 < c && c < 128) { + return c - 5; + } + if (c > (int)sizeof(long)) long_toobig(c); + x = 0; + for (i=0;i<c;i++) { + x |= (long)r_byte(arg) << (8*i); + } } else { - if (-129 < c && c < -4) { - return c + 5; - } - c = -c; - if (c > (int)sizeof(long)) long_toobig(c); - x = -1; - for (i=0;i<c;i++) { - x &= ~((long)0xff << (8*i)); - x |= (long)r_byte(arg) << (8*i); - } + if (-129 < c && c < -4) { + return c + 5; + } + c = -c; + if (c > (int)sizeof(long)) long_toobig(c); + x = -1; + for (i=0;i<c;i++) { + x &= ~((long)0xff << (8*i)); + x |= (long)r_byte(arg) << (8*i); + } } return x; } @@ -1323,39 +1432,39 @@ r_bytes1_buffered(long len, struct load_arg *arg) VALUE str; if (len <= arg->buflen) { - str = rb_str_new(arg->buf+arg->offset, len); - arg->offset += len; - arg->buflen -= len; + str = rb_str_new(arg->buf+arg->offset, len); + arg->offset += len; + arg->buflen -= len; } else { - long buflen = arg->buflen; - long readable = arg->readable + 1; - long tmp_len, read_len, need_len = len - buflen; - VALUE tmp, n; + long buflen = arg->buflen; + long readable = arg->readable + 1; + long tmp_len, read_len, need_len = len - buflen; + VALUE tmp, n; - readable = readable < BUFSIZ ? readable : BUFSIZ; - read_len = need_len > readable ? need_len : readable; - n = LONG2NUM(read_len); - tmp = load_funcall(arg, arg->src, s_read, 1, &n); - if (NIL_P(tmp)) too_short(); - StringValue(tmp); + readable = readable < BUFSIZ ? readable : BUFSIZ; + read_len = need_len > readable ? need_len : readable; + n = LONG2NUM(read_len); + tmp = load_funcall(arg, arg->src, s_read, 1, &n); + if (NIL_P(tmp)) too_short(); + StringValue(tmp); - tmp_len = RSTRING_LEN(tmp); + tmp_len = RSTRING_LEN(tmp); - if (tmp_len < need_len) too_short(); + if (tmp_len < need_len) too_short(); - str = rb_str_new(arg->buf+arg->offset, buflen); - rb_str_cat(str, RSTRING_PTR(tmp), need_len); + str = rb_str_new(arg->buf+arg->offset, buflen); + rb_str_cat(str, RSTRING_PTR(tmp), need_len); - if (tmp_len > need_len) { - buflen = tmp_len - need_len; - memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen); - arg->buflen = buflen; - } - else { - arg->buflen = 0; - } - arg->offset = 0; + if (tmp_len > need_len) { + buflen = tmp_len - need_len; + memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen); + arg->buflen = buflen; + } + else { + arg->buflen = 0; + } + arg->offset = 0; } return str; @@ -1370,21 +1479,21 @@ r_bytes0(long len, struct load_arg *arg) if (len == 0) return rb_str_new(0, 0); if (RB_TYPE_P(arg->src, T_STRING)) { - if (RSTRING_LEN(arg->src) - arg->offset >= len) { - str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len); - arg->offset += len; - } - else { - too_short(); - } + if (RSTRING_LEN(arg->src) - arg->offset >= len) { + str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len); + arg->offset += len; + } + else { + too_short(); + } } else { - if (arg->readable > 0 || arg->buflen > 0) { - str = r_bytes1_buffered(len, arg); - } - else { - str = r_bytes1(len, arg); - } + if (arg->readable > 0 || arg->buflen > 0) { + str = r_bytes1_buffered(len, arg); + } + else { + str = r_bytes1(len, arg); + } } return str; } @@ -1406,30 +1515,33 @@ sym2encidx(VALUE sym, VALUE val) RSTRING_GETMEM(sym, p, l); if (l <= 0) return -1; if (name_equal(name_encoding, sizeof(name_encoding), p, l)) { - int idx = rb_enc_find_index(StringValueCStr(val)); - return idx; + int idx = rb_enc_find_index(StringValueCStr(val)); + return idx; } if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) { - if (val == Qfalse) return rb_usascii_encindex(); - else if (val == Qtrue) return rb_utf8_encindex(); - /* bogus ignore */ + if (val == Qfalse) return rb_usascii_encindex(); + else if (val == Qtrue) return rb_utf8_encindex(); + /* bogus ignore */ } return -1; } static int -ruby2_keywords_flag_check(VALUE sym) +symname_equal(VALUE sym, const char *name, size_t nlen) { const char *p; long l; + if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0; RSTRING_GETMEM(sym, p, l); - if (l <= 0) return 0; - if (name_equal(name_s_ruby2_keywords_flag, rb_strlen_lit(name_s_ruby2_keywords_flag), p, 1)) { - return 1; - } - return 0; + return name_equal(name, nlen, p, l); } +#define BUILD_ASSERT_POSITIVE(n) \ + /* make 0 negative to workaround the "zero size array" GCC extension, */ \ + ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */ +#define symname_equal_lit(sym, sym_name) \ + symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name))) + static VALUE r_symlink(struct load_arg *arg) { @@ -1437,7 +1549,7 @@ r_symlink(struct load_arg *arg) long num = r_long(arg); if (!st_lookup(arg->symbols, num, &sym)) { - rb_raise(rb_eArgError, "bad symbol"); + rb_raise(rb_eArgError, "bad symbol"); } return (VALUE)sym; } @@ -1453,13 +1565,19 @@ r_symreal(struct load_arg *arg, int ivar) if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII); st_insert(arg->symbols, (st_data_t)n, (st_data_t)s); if (ivar) { - long num = r_long(arg); - while (num-- > 0) { - sym = r_symbol(arg); - idx = sym2encidx(sym, r_object(arg)); - } + long num = r_long(arg); + while (num-- > 0) { + sym = r_symbol(arg); + idx = sym2encidx(sym, r_object(arg)); + } + } + if (idx > 0) { + rb_enc_associate_index(s, idx); + if (is_broken_string(s)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE, + rb_enc_name(rb_enc_from_index(idx)), s); + } } - if (idx > 0) rb_enc_associate_index(s, idx); return s; } @@ -1472,17 +1590,17 @@ r_symbol(struct load_arg *arg) again: switch ((type = r_byte(arg))) { default: - rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); + rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); case TYPE_IVAR: - ivar = 1; - goto again; + ivar = 1; + goto again; case TYPE_SYMBOL: - return r_symreal(arg, ivar); + return r_symreal(arg, ivar); case TYPE_SYMLINK: - if (ivar) { - rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); - } - return r_symlink(arg); + if (ivar) { + rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); + } + return r_symlink(arg); } } @@ -1507,6 +1625,7 @@ r_entry0(VALUE v, st_index_t num, struct load_arg *arg) st_lookup(arg->compat_tbl, v, &real_obj); } st_insert(arg->data, num, real_obj); + st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue); return v; } @@ -1531,27 +1650,42 @@ static VALUE r_post_proc(VALUE v, struct load_arg *arg) { if (arg->proc) { - v = load_funcall(arg, arg->proc, s_call, 1, &v); + v = load_funcall(arg, arg->proc, s_call, 1, &v); } return v; } static VALUE -r_leave(VALUE v, struct load_arg *arg) +r_leave(VALUE v, struct load_arg *arg, bool partial) { v = r_fixup_compat(v, arg); - v = r_post_proc(v, arg); + if (!partial) { + st_data_t data; + st_data_t key = (st_data_t)v; + st_delete(arg->partial_objects, &key, &data); + if (arg->freeze) { + if (RB_TYPE_P(v, T_MODULE) || RB_TYPE_P(v, T_CLASS)) { + // noop + } + else if (RB_TYPE_P(v, T_STRING)) { + v = rb_str_to_interned_str(v); + } + else { + OBJ_FREEZE(v); + } + } + v = r_post_proc(v, arg); + } return v; } static int -copy_ivar_i(st_data_t key, st_data_t val, st_data_t arg) +copy_ivar_i(ID vid, VALUE value, st_data_t arg) { - VALUE obj = (VALUE)arg, value = (VALUE)val; - ID vid = (ID)key; + VALUE obj = (VALUE)arg; if (!rb_ivar_defined(obj, vid)) - rb_ivar_set(obj, vid, value); + rb_ivar_set(obj, vid, value); return ST_CONTINUE; } @@ -1569,22 +1703,22 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg) len = r_long(arg); if (len > 0) { - do { - VALUE sym = r_symbol(arg); - VALUE val = r_object(arg); - int idx = sym2encidx(sym, val); - if (idx >= 0) { + do { + VALUE sym = r_symbol(arg); + VALUE val = r_object(arg); + int idx = sym2encidx(sym, val); + if (idx >= 0) { if (rb_enc_capable(obj)) { rb_enc_associate_index(obj, idx); } else { rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj); } - if (has_encoding) *has_encoding = TRUE; - } - else if (ruby2_keywords_flag_check(sym)) { + if (has_encoding) *has_encoding = TRUE; + } + else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) { if (RB_TYPE_P(obj, T_HASH)) { - RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS; + rb_hash_ruby2_keywords(obj); } else { rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj); @@ -1592,8 +1726,8 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg) } else { rb_ivar_set(obj, rb_intern_str(sym), val); - } - } while (--len > 0); + } + } while (--len > 0); } } @@ -1603,7 +1737,7 @@ path2class(VALUE path) VALUE v = rb_path_to_class(path); if (!RB_TYPE_P(v, T_CLASS)) { - rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path); + rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path); } return v; } @@ -1614,7 +1748,7 @@ static VALUE must_be_module(VALUE v, VALUE path) { if (!RB_TYPE_P(v, T_MODULE)) { - rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path); + rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path); } return v; } @@ -1630,7 +1764,7 @@ obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass) marshal_compat_t *compat = (marshal_compat_t*)data; VALUE real_obj = rb_obj_alloc(klass); VALUE obj = rb_obj_alloc(compat->oldclass); - if (oldclass) *oldclass = compat->oldclass; + if (oldclass) *oldclass = compat->oldclass; if (!arg->compat_tbl) { arg->compat_tbl = rb_init_identtable(); @@ -1653,449 +1787,499 @@ append_extmod(VALUE obj, VALUE extmod) { long i = RARRAY_LEN(extmod); while (i > 0) { - VALUE m = RARRAY_AREF(extmod, --i); - rb_extend_object(obj, m); + VALUE m = RARRAY_AREF(extmod, --i); + rb_extend_object(obj, m); } return obj; } #define prohibit_ivar(type, str) do { \ - if (!ivp || !*ivp) break; \ - rb_raise(rb_eTypeError, \ - "can't override instance variable of "type" `%"PRIsVALUE"'", \ - (str)); \ + if (!ivp || !*ivp) break; \ + rb_raise(rb_eTypeError, \ + "can't override instance variable of "type" '%"PRIsVALUE"'", \ + (str)); \ } while (0) +static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type); + static VALUE -r_object0(struct load_arg *arg, int *ivp, VALUE extmod) +r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod) { - VALUE v = Qnil; int type = r_byte(arg); + return r_object_for(arg, partial, ivp, extmod, type); +} + +static VALUE +r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type) +{ + VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size; + VALUE v = Qnil; long id; st_data_t link; switch (type) { case TYPE_LINK: - id = r_long(arg); - if (!st_lookup(arg->data, (st_data_t)id, &link)) { - rb_raise(rb_eArgError, "dump format error (unlinked)"); - } - v = (VALUE)link; - v = r_post_proc(v, arg); - break; + id = r_long(arg); + if (!st_lookup(arg->data, (st_data_t)id, &link)) { + rb_raise(rb_eArgError, "dump format error (unlinked)"); + } + v = (VALUE)link; + if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) { + v = r_post_proc(v, arg); + } + break; case TYPE_IVAR: { - int ivar = TRUE; - - v = r_object0(arg, &ivar, extmod); - if (ivar) r_ivar(v, NULL, arg); - } - break; + int ivar = TRUE; + v = r_object0(arg, true, &ivar, extmod); + if (ivar) r_ivar(v, NULL, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_EXTENDED: - { - VALUE path = r_unique(arg); - VALUE m = rb_path_to_class(path); - if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0); - - if (RB_TYPE_P(m, T_CLASS)) { /* prepended */ - VALUE c; - - v = r_object0(arg, 0, Qnil); - c = CLASS_OF(v); - if (c != m || FL_TEST(c, FL_SINGLETON)) { - rb_raise(rb_eArgError, - "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE, - path, rb_class_name(c)); - } - c = rb_singleton_class(v); - while (RARRAY_LEN(extmod) > 0) { - m = rb_ary_pop(extmod); - rb_prepend_module(c, m); - } - } - else { - must_be_module(m, path); - rb_ary_push(extmod, m); - - v = r_object0(arg, 0, extmod); - while (RARRAY_LEN(extmod) > 0) { - m = rb_ary_pop(extmod); - rb_extend_object(v, m); - } - } - } - break; + { + VALUE path = r_unique(arg); + VALUE m = rb_path_to_class(path); + if (NIL_P(extmod)) extmod = rb_ary_hidden_new(0); + + if (RB_TYPE_P(m, T_CLASS)) { /* prepended */ + VALUE c; + + v = r_object0(arg, true, 0, Qnil); + c = CLASS_OF(v); + if (c != m || FL_TEST(c, FL_SINGLETON)) { + rb_raise(rb_eArgError, + "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE, + path, rb_class_name(c)); + } + c = rb_singleton_class(v); + while (RARRAY_LEN(extmod) > 0) { + m = rb_ary_pop(extmod); + rb_prepend_module(c, m); + } + } + else { + must_be_module(m, path); + rb_ary_push(extmod, m); + + v = r_object0(arg, true, 0, extmod); + while (RARRAY_LEN(extmod) > 0) { + m = rb_ary_pop(extmod); + rb_extend_object(v, m); + } + } + v = r_leave(v, arg, partial); + } + break; case TYPE_UCLASS: - { - VALUE c = path2class(r_unique(arg)); - - if (FL_TEST(c, FL_SINGLETON)) { - rb_raise(rb_eTypeError, "singleton can't be loaded"); - } - v = r_object0(arg, 0, extmod); - if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) { + { + VALUE c = path2class(r_unique(arg)); + + if (FL_TEST(c, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton can't be loaded"); + } + type = r_byte(arg); + if ((c == rb_cHash) && + /* Hack for compare_by_identify */ + (type == TYPE_HASH || type == TYPE_HASH_DEF)) { + hash_new_with_size = rb_ident_hash_new_with_size; + goto type_hash; + } + v = r_object_for(arg, partial, 0, extmod, type); + if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) { goto format_error; - } - if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { - VALUE tmp = rb_obj_alloc(c); + } + if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { + VALUE tmp = rb_obj_alloc(c); - if (TYPE(v) != TYPE(tmp)) goto format_error; - } - RBASIC_SET_CLASS(v, c); - } - break; + if (TYPE(v) != TYPE(tmp)) goto format_error; + } + RBASIC_SET_CLASS(v, c); + } + break; format_error: rb_raise(rb_eArgError, "dump format error (user class)"); case TYPE_NIL: - v = Qnil; - v = r_leave(v, arg); - break; + v = Qnil; + v = r_leave(v, arg, false); + break; case TYPE_TRUE: - v = Qtrue; - v = r_leave(v, arg); - break; + v = Qtrue; + v = r_leave(v, arg, false); + break; case TYPE_FALSE: - v = Qfalse; - v = r_leave(v, arg); - break; + v = Qfalse; + v = r_leave(v, arg, false); + break; case TYPE_FIXNUM: - { - long i = r_long(arg); - v = LONG2FIX(i); - } - v = r_leave(v, arg); - break; + { + long i = r_long(arg); + v = LONG2FIX(i); + } + v = r_leave(v, arg, false); + break; case TYPE_FLOAT: - { - double d; - VALUE str = r_bytes(arg); - const char *ptr = RSTRING_PTR(str); - - if (strcmp(ptr, "nan") == 0) { - d = nan(""); - } - else if (strcmp(ptr, "inf") == 0) { - d = HUGE_VAL; - } - else if (strcmp(ptr, "-inf") == 0) { - d = -HUGE_VAL; - } - else { - char *e; - d = strtod(ptr, &e); - d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); - } - v = DBL2NUM(d); - v = r_entry(v, arg); - v = r_leave(v, arg); - } - break; + { + double d; + VALUE str = r_bytes(arg); + const char *ptr = RSTRING_PTR(str); + + if (strcmp(ptr, "nan") == 0) { + d = nan(""); + } + else if (strcmp(ptr, "inf") == 0) { + d = HUGE_VAL; + } + else if (strcmp(ptr, "-inf") == 0) { + d = -HUGE_VAL; + } + else { + char *e; + d = strtod(ptr, &e); + d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); + } + v = DBL2NUM(d); + v = r_entry(v, arg); + v = r_leave(v, arg, false); + } + break; case TYPE_BIGNUM: - { - long len; - VALUE data; + { + long len; + VALUE data; int sign; - sign = r_byte(arg); - len = r_long(arg); - data = r_bytes0(len * 2, arg); - v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, - INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); - rb_str_resize(data, 0L); - v = r_entry(v, arg); - v = r_leave(v, arg); - } - break; + sign = r_byte(arg); + len = r_long(arg); + + if (SIZEOF_VALUE >= 8 && len <= 4) { + // Representable within uintptr, likely FIXNUM + VALUE num = 0; + for (int i = 0; i < len; i++) { + num |= (VALUE)r_byte(arg) << (i * 16); + num |= (VALUE)r_byte(arg) << (i * 16 + 8); + } +#if SIZEOF_VALUE == SIZEOF_LONG + v = ULONG2NUM(num); +#else + v = ULL2NUM(num); +#endif + if (sign == '-') { + v = rb_int_uminus(v); + } + } + else { + data = r_bytes0(len * 2, arg); + v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); + rb_str_resize(data, 0L); + } + v = r_entry(v, arg); + v = r_leave(v, arg, false); + } + break; case TYPE_STRING: - v = r_entry(r_string(arg), arg); - v = r_leave(v, arg); - break; + v = r_entry(r_string(arg), arg); + v = r_leave(v, arg, partial); + break; case TYPE_REGEXP: - { - VALUE str = r_bytes(arg); - int options = r_byte(arg); - int has_encoding = FALSE; - st_index_t idx = r_prepare(arg); - - if (ivp) { - r_ivar(str, &has_encoding, arg); - *ivp = FALSE; - } - if (!has_encoding) { - /* 1.8 compatibility; remove escapes undefined in 1.8 */ - char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr; - long len = RSTRING_LEN(str); - long bs = 0; - for (; len-- > 0; *dst++ = *src++) { - switch (*src) { - case '\\': bs++; break; - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'o': case 'p': case 'q': case 'u': case 'y': - case 'E': case 'F': case 'H': case 'I': case 'J': case 'K': - case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y': - if (bs & 1) --dst; + { + VALUE str = r_bytes(arg); + int options = r_byte(arg); + int has_encoding = FALSE; + st_index_t idx = r_prepare(arg); + + if (ivp) { + r_ivar(str, &has_encoding, arg); + *ivp = FALSE; + } + if (!has_encoding) { + /* 1.8 compatibility; remove escapes undefined in 1.8 */ + char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr; + long len = RSTRING_LEN(str); + long bs = 0; + for (; len-- > 0; *dst++ = *src++) { + switch (*src) { + case '\\': bs++; break; + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'o': case 'p': case 'q': case 'u': case 'y': + case 'E': case 'F': case 'H': case 'I': case 'J': case 'K': + case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y': + if (bs & 1) --dst; /* fall through */ - default: bs = 0; break; - } - } - rb_str_set_len(str, dst - ptr); - } - v = r_entry0(rb_reg_new_str(str, options), idx, arg); - v = r_leave(v, arg); - } - break; + default: bs = 0; break; + } + } + rb_str_set_len(str, dst - ptr); + } + VALUE regexp = rb_reg_new_str(str, options); + r_copy_ivar(regexp, str); + + v = r_entry0(regexp, idx, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_ARRAY: - { - long len = r_long(arg); - - v = rb_ary_new2(len); - v = r_entry(v, arg); - arg->readable += len - 1; - while (len--) { - rb_ary_push(v, r_object(arg)); - arg->readable--; - } - v = r_leave(v, arg); - arg->readable++; - } - break; + { + long len = r_long(arg); + + v = rb_ary_new2(len); + v = r_entry(v, arg); + arg->readable += len - 1; + while (len--) { + rb_ary_push(v, r_object(arg)); + arg->readable--; + } + v = r_leave(v, arg, partial); + arg->readable++; + } + break; case TYPE_HASH: case TYPE_HASH_DEF: - { - long len = r_long(arg); - - v = rb_hash_new_with_size(len); - v = r_entry(v, arg); - arg->readable += (len - 1) * 2; - while (len--) { - VALUE key = r_object(arg); - VALUE value = r_object(arg); - rb_hash_aset(v, key, value); - arg->readable -= 2; - } - arg->readable += 2; - if (type == TYPE_HASH_DEF) { - RHASH_SET_IFNONE(v, r_object(arg)); - } - v = r_leave(v, arg); - } - break; + type_hash: + { + long len = r_long(arg); + + v = hash_new_with_size(len); + v = r_entry(v, arg); + arg->readable += (len - 1) * 2; + while (len--) { + VALUE key = r_object(arg); + VALUE value = r_object(arg); + rb_hash_aset(v, key, value); + arg->readable -= 2; + } + arg->readable += 2; + if (type == TYPE_HASH_DEF) { + RHASH_SET_IFNONE(v, r_object(arg)); + } + v = r_leave(v, arg, partial); + } + break; case TYPE_STRUCT: - { - VALUE mem, values; - long i; - VALUE slot; - st_index_t idx = r_prepare(arg); - VALUE klass = path2class(r_unique(arg)); - long len = r_long(arg); + { + VALUE mem, values; + long i; + VALUE slot; + st_index_t idx = r_prepare(arg); + VALUE klass = path2class(r_unique(arg)); + long len = r_long(arg); v = rb_obj_alloc(klass); - if (!RB_TYPE_P(v, T_STRUCT)) { - rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass)); - } - mem = rb_struct_s_members(klass); + if (!RB_TYPE_P(v, T_STRUCT)) { + rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass)); + } + mem = rb_struct_s_members(klass); if (RARRAY_LEN(mem) != len) { rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)", rb_class_name(klass)); } - arg->readable += (len - 1) * 2; - v = r_entry0(v, idx, arg); - values = rb_ary_new2(len); - { - VALUE keywords = Qfalse; - if (RTEST(rb_struct_s_keyword_init(klass))) { - keywords = rb_hash_new(); - rb_ary_push(values, keywords); - } - - for (i=0; i<len; i++) { - VALUE n = rb_sym2str(RARRAY_AREF(mem, i)); - slot = r_symbol(arg); - - if (!rb_str_equal(n, slot)) { - rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")", - rb_class_name(klass), - slot, n); - } - if (keywords) { - rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg)); - } - else { - rb_ary_push(values, r_object(arg)); - } - arg->readable -= 2; - } - } + arg->readable += (len - 1) * 2; + v = r_entry0(v, idx, arg); + values = rb_ary_new2(len); + { + VALUE keywords = Qfalse; + if (RTEST(rb_struct_s_keyword_init(klass))) { + keywords = rb_hash_new(); + rb_ary_push(values, keywords); + } + + for (i=0; i<len; i++) { + VALUE n = rb_sym2str(RARRAY_AREF(mem, i)); + slot = r_symbol(arg); + + if (!rb_str_equal(n, slot)) { + rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")", + rb_class_name(klass), + slot, n); + } + if (keywords) { + rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg)); + } + else { + rb_ary_push(values, r_object(arg)); + } + arg->readable -= 2; + } + } rb_struct_initialize(v, values); - v = r_leave(v, arg); - arg->readable += 2; - } - break; + v = r_leave(v, arg, partial); + arg->readable += 2; + } + break; case TYPE_USERDEF: { - VALUE name = r_unique(arg); - VALUE klass = path2class(name); - VALUE data; - st_data_t d; - - if (!rb_obj_respond_to(klass, s_load, TRUE)) { - rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method `_load'", - name); - } - data = r_string(arg); - if (ivp) { - r_ivar(data, NULL, arg); - *ivp = FALSE; - } - v = load_funcall(arg, klass, s_load, 1, &data); - v = r_entry(v, arg); - if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) { - marshal_compat_t *compat = (marshal_compat_t*)d; - v = compat->loader(klass, v); - } - v = r_post_proc(v, arg); - } + VALUE name = r_unique(arg); + VALUE klass = path2class(name); + VALUE data; + st_data_t d; + + if (!rb_obj_respond_to(klass, s_load, TRUE)) { + rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method '_load'", + name); + } + data = r_string(arg); + if (ivp) { + r_ivar(data, NULL, arg); + *ivp = FALSE; + } + v = load_funcall(arg, klass, s_load, 1, &data); + v = r_entry(v, arg); + if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) { + marshal_compat_t *compat = (marshal_compat_t*)d; + v = compat->loader(klass, v); + } + if (!partial) { + if (arg->freeze) { + OBJ_FREEZE(v); + } + v = r_post_proc(v, arg); + } + } break; case TYPE_USRMARSHAL: { - VALUE name = r_unique(arg); - VALUE klass = path2class(name); - VALUE oldclass = 0; - VALUE data; + VALUE name = r_unique(arg); + VALUE klass = path2class(name); + VALUE oldclass = 0; + VALUE data; - v = obj_alloc_by_klass(klass, arg, &oldclass); + v = obj_alloc_by_klass(klass, arg, &oldclass); if (!NIL_P(extmod)) { - /* for the case marshal_load is overridden */ - append_extmod(v, extmod); + /* for the case marshal_load is overridden */ + append_extmod(v, extmod); + } + if (!rb_obj_respond_to(v, s_mload, TRUE)) { + rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method 'marshal_load'", + name); + } + v = r_entry(v, arg); + data = r_object(arg); + load_funcall(arg, v, s_mload, 1, &data); + v = r_fixup_compat(v, arg); + v = r_copy_ivar(v, data); + if (arg->freeze) { + OBJ_FREEZE(v); } - if (!rb_obj_respond_to(v, s_mload, TRUE)) { - rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method `marshal_load'", - name); - } - v = r_entry(v, arg); - data = r_object(arg); - load_funcall(arg, v, s_mload, 1, &data); - v = r_fixup_compat(v, arg); - v = r_copy_ivar(v, data); - v = r_post_proc(v, arg); - if (!NIL_P(extmod)) { - if (oldclass) append_extmod(v, extmod); - rb_ary_clear(extmod); - } - } + v = r_post_proc(v, arg); + if (!NIL_P(extmod)) { + if (oldclass) append_extmod(v, extmod); + rb_ary_clear(extmod); + } + } break; case TYPE_OBJECT: - { - st_index_t idx = r_prepare(arg); + { + st_index_t idx = r_prepare(arg); v = obj_alloc_by_path(r_unique(arg), arg); - if (!RB_TYPE_P(v, T_OBJECT)) { - rb_raise(rb_eArgError, "dump format error"); - } - v = r_entry0(v, idx, arg); - r_ivar(v, NULL, arg); - v = r_leave(v, arg); - } - break; + if (!RB_TYPE_P(v, T_OBJECT)) { + rb_raise(rb_eArgError, "dump format error"); + } + v = r_entry0(v, idx, arg); + r_ivar(v, NULL, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_DATA: - { - VALUE name = r_unique(arg); - VALUE klass = path2class(name); - VALUE oldclass = 0; - VALUE r; - - v = obj_alloc_by_klass(klass, arg, &oldclass); - if (!RB_TYPE_P(v, T_DATA)) { - rb_raise(rb_eArgError, "dump format error"); - } - v = r_entry(v, arg); - if (!rb_obj_respond_to(v, s_load_data, TRUE)) { - rb_raise(rb_eTypeError, - "class %"PRIsVALUE" needs to have instance method `_load_data'", - name); - } - r = r_object0(arg, 0, extmod); - load_funcall(arg, v, s_load_data, 1, &r); - v = r_leave(v, arg); - } - break; + { + VALUE name = r_unique(arg); + VALUE klass = path2class(name); + VALUE oldclass = 0; + VALUE r; + + v = obj_alloc_by_klass(klass, arg, &oldclass); + if (!RB_TYPE_P(v, T_DATA)) { + rb_raise(rb_eArgError, "dump format error"); + } + v = r_entry(v, arg); + if (!rb_obj_respond_to(v, s_load_data, TRUE)) { + rb_raise(rb_eTypeError, + "class %"PRIsVALUE" needs to have instance method '_load_data'", + name); + } + r = r_object0(arg, partial, 0, extmod); + load_funcall(arg, v, s_load_data, 1, &r); + v = r_leave(v, arg, partial); + } + break; case TYPE_MODULE_OLD: { - VALUE str = r_bytes(arg); + VALUE str = r_bytes(arg); - v = rb_path_to_class(str); - prohibit_ivar("class/module", str); - v = r_entry(v, arg); - v = r_leave(v, arg); - } - break; + v = rb_path_to_class(str); + prohibit_ivar("class/module", str); + v = r_entry(v, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_CLASS: { - VALUE str = r_bytes(arg); + VALUE str = r_bytes(arg); - v = path2class(str); - prohibit_ivar("class", str); - v = r_entry(v, arg); - v = r_leave(v, arg); - } - break; + v = path2class(str); + prohibit_ivar("class", str); + v = r_entry(v, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_MODULE: { - VALUE str = r_bytes(arg); + VALUE str = r_bytes(arg); - v = path2module(str); - prohibit_ivar("module", str); - v = r_entry(v, arg); - v = r_leave(v, arg); - } - break; + v = path2module(str); + prohibit_ivar("module", str); + v = r_entry(v, arg); + v = r_leave(v, arg, partial); + } + break; case TYPE_SYMBOL: - if (ivp) { - v = r_symreal(arg, *ivp); - *ivp = FALSE; - } - else { - v = r_symreal(arg, 0); - } - v = rb_str_intern(v); - v = r_leave(v, arg); - break; + if (ivp) { + v = r_symreal(arg, *ivp); + *ivp = FALSE; + } + else { + v = r_symreal(arg, 0); + } + v = rb_str_intern(v); + v = r_leave(v, arg, partial); + break; case TYPE_SYMLINK: - v = rb_str_intern(r_symlink(arg)); - break; + v = rb_str_intern(r_symlink(arg)); + break; default: - rb_raise(rb_eArgError, "dump format error(0x%x)", type); - break; + rb_raise(rb_eArgError, "dump format error(0x%x)", type); + break; } - if (v == Qundef) { - rb_raise(rb_eArgError, "dump format error (bad link)"); + if (UNDEF_P(v)) { + rb_raise(rb_eArgError, "dump format error (bad link)"); } return v; @@ -2104,16 +2288,14 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) static VALUE r_object(struct load_arg *arg) { - return r_object0(arg, 0, Qnil); + return r_object0(arg, false, 0, Qnil); } static void clear_load_arg(struct load_arg *arg) { - if (arg->buf) { - xfree(arg->buf); - arg->buf = 0; - } + xfree(arg->buf); + arg->buf = NULL; arg->buflen = 0; arg->offset = 0; arg->readable = 0; @@ -2122,39 +2304,16 @@ clear_load_arg(struct load_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + st_free_table(arg->partial_objects); + arg->partial_objects = 0; if (arg->compat_tbl) { - st_free_table(arg->compat_tbl); - arg->compat_tbl = 0; + st_free_table(arg->compat_tbl); + arg->compat_tbl = 0; } } -/* - * call-seq: - * load( source [, proc] ) -> obj - * restore( source [, proc] ) -> obj - * - * Returns the result of converting the serialized data in source into a - * Ruby object (possibly with associated subordinate objects). source - * may be either an instance of IO or an object that responds to - * to_str. If proc is specified, each object will be passed to the proc, as the object - * is being deserialized. - * - * Never pass untrusted data (including user supplied input) to this method. - * Please see the overview for further details. - */ -static VALUE -marshal_load(int argc, VALUE *argv, VALUE _) -{ - VALUE port, proc; - - rb_check_arity(argc, 1, 2); - port = argv[0]; - proc = argc > 1 ? argv[1] : Qnil; - return rb_marshal_load_with_proc(port, proc); -} - VALUE -rb_marshal_load_with_proc(VALUE port, VALUE proc) +rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze) { int major, minor; VALUE v; @@ -2163,40 +2322,42 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc) v = rb_check_string_type(port); if (!NIL_P(v)) { - port = v; + port = v; } else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) { - rb_check_funcall(port, s_binmode, 0, 0); + rb_check_funcall(port, s_binmode, 0, 0); } else { - io_needed(); + io_needed(); } wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg); arg->src = port; arg->offset = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->partial_objects = rb_init_identtable(); arg->compat_tbl = 0; arg->proc = 0; arg->readable = 0; + arg->freeze = freeze; if (NIL_P(v)) - arg->buf = xmalloc(BUFSIZ); + arg->buf = xmalloc(BUFSIZ); else - arg->buf = 0; + arg->buf = 0; major = r_byte(arg); minor = r_byte(arg); if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) { - clear_load_arg(arg); - rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ + clear_load_arg(arg); + rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ \tformat version %d.%d required; %d.%d given", - MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); } if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) { - rb_warn("incompatible marshal file format (can be read)\n\ + rb_warn("incompatible marshal file format (can be read)\n\ \tformat version %d.%d required; %d.%d given", - MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); } if (!NIL_P(proc)) arg->proc = proc; @@ -2207,6 +2368,14 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc) return v; } +static VALUE +marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze) +{ + return rb_marshal_load_with_proc(source, proc, RTEST(freeze)); +} + +#include "marshal.rbinc" + /* * The marshaling library converts collections of Ruby objects into a * byte stream, allowing them to be stored outside the currently @@ -2339,8 +2508,6 @@ Init_marshal(void) set_id(s_ruby2_keywords_flag); rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1); - rb_define_module_function(rb_mMarshal, "load", marshal_load, -1); - rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1); /* major version */ rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR)); @@ -2348,6 +2515,20 @@ Init_marshal(void) rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR)); } +static int +free_compat_i(st_data_t key, st_data_t value, st_data_t _) +{ + xfree((marshal_compat_t *)value); + return ST_CONTINUE; +} + +static void +free_compat_allocator_table(void *data) +{ + st_foreach(data, free_compat_i, 0); + st_free_table(data); +} + static st_table * compat_allocator_table(void) { @@ -2356,8 +2537,8 @@ compat_allocator_table(void) #undef RUBY_UNTYPED_DATA_WARNING #define RUBY_UNTYPED_DATA_WARNING 0 compat_allocator_tbl_wrapper = - Data_Wrap_Struct(0, mark_marshal_compat_t, 0, compat_allocator_tbl); - rb_gc_register_mark_object(compat_allocator_tbl_wrapper); + Data_Wrap_Struct(0, mark_marshal_compat_t, free_compat_allocator_table, compat_allocator_tbl); + rb_vm_register_global_object(compat_allocator_tbl_wrapper); return compat_allocator_tbl; } @@ -2370,5 +2551,5 @@ rb_marshal_dump(VALUE obj, VALUE port) VALUE rb_marshal_load(VALUE port) { - return rb_marshal_load_with_proc(port, Qnil); + return rb_marshal_load_with_proc(port, Qnil, false); } |