summaryrefslogtreecommitdiff
path: root/marshal.c
diff options
context:
space:
mode:
Diffstat (limited to 'marshal.c')
-rw-r--r--marshal.c239
1 files changed, 143 insertions, 96 deletions
diff --git a/marshal.c b/marshal.c
index c1b19a6e63..e6ee3b47b0 100644
--- a/marshal.c
+++ b/marshal.c
@@ -30,6 +30,7 @@
#include "internal/hash.h"
#include "internal/numeric.h"
#include "internal/object.h"
+#include "internal/re.h"
#include "internal/struct.h"
#include "internal/symbol.h"
#include "internal/util.h"
@@ -40,6 +41,7 @@
#include "ruby/util.h"
#include "builtin.h"
#include "shape.h"
+#include "ruby/internal/attr/nonstring.h"
#define BITSPERSHORT (2*CHAR_BIT)
#define SHORTMASK ((1<<BITSPERSHORT)-1)
@@ -144,12 +146,14 @@ rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE),
compat_allocator_table();
compat = ALLOC(marshal_compat_t);
- RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->newclass, newclass);
- RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->oldclass, oldclass);
+ compat->newclass = newclass;
+ compat->oldclass = oldclass;
compat->dumper = dumper;
compat->loader = loader;
st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
+ RB_OBJ_WRITTEN(compat_allocator_tbl_wrapper, Qundef, newclass);
+ RB_OBJ_WRITTEN(compat_allocator_tbl_wrapper, Qundef, oldclass);
}
struct dump_arg {
@@ -459,6 +463,31 @@ w_float(double d, struct dump_arg *arg)
}
}
+
+static VALUE
+w_encivar(VALUE str, struct dump_arg *arg)
+{
+ VALUE encname = encoding_name(str, arg);
+ if (NIL_P(encname) ||
+ is_ascii_string(str)) {
+ return Qnil;
+ }
+ w_byte(TYPE_IVAR, arg);
+ return encname;
+}
+
+static void
+w_encname(VALUE encname, struct dump_arg *arg)
+{
+ if (!NIL_P(encname)) {
+ struct dump_call_arg c_arg;
+ c_arg.limit = 1;
+ c_arg.arg = arg;
+ w_long(1L, arg);
+ w_encoding(encname, &c_arg);
+ }
+}
+
static void
w_symbol(VALUE sym, struct dump_arg *arg)
{
@@ -475,24 +504,11 @@ w_symbol(VALUE sym, struct dump_arg *arg)
if (!sym) {
rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
}
- encname = encoding_name(sym, arg);
- if (NIL_P(encname) ||
- is_ascii_string(sym)) {
- encname = Qnil;
- }
- else {
- w_byte(TYPE_IVAR, arg);
- }
+ encname = w_encivar(sym, arg);
w_byte(TYPE_SYMBOL, arg);
w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
- if (!NIL_P(encname)) {
- struct dump_call_arg c_arg;
- c_arg.limit = 1;
- c_arg.arg = arg;
- w_long(1L, arg);
- w_encoding(encname, &c_arg);
- }
+ w_encname(encname, arg);
}
}
@@ -530,7 +546,7 @@ w_extended(VALUE klass, struct dump_arg *arg, int check)
klass = RCLASS_SUPER(klass);
}
while (BUILTIN_TYPE(klass) == T_ICLASS) {
- if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
+ if (!RICLASS_IS_ORIGIN_P(klass) ||
BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
VALUE path = rb_class_name(RBASIC(klass)->klass);
w_byte(TYPE_EXTENDED, arg);
@@ -712,28 +728,9 @@ has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
static void
w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
{
- shape_id_t shape_id = rb_shape_get_shape_id(arg->obj);
struct w_ivar_arg ivarg = {arg, num};
if (!num) return;
- rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
-
- if (shape_id != rb_shape_get_shape_id(arg->obj)) {
- rb_shape_t * expected_shape = rb_shape_get_shape_by_id(shape_id);
- rb_shape_t * actual_shape = rb_shape_get_shape(arg->obj);
-
- // If the shape tree got _shorter_ then we probably removed an IV
- // If the shape tree got longer, then we probably added an IV.
- // The exception message might not be accurate when someone adds and
- // removes the same number of IVs, but they will still get an exception
- if (rb_shape_depth(expected_shape) > rb_shape_depth(actual_shape)) {
- rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
- CLASS_OF(arg->obj));
- }
- else {
- rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
- CLASS_OF(arg->obj));
- }
- }
+ rb_ivar_foreach_buffered(obj, w_obj_each, (st_data_t)&ivarg);
}
static void
@@ -933,8 +930,9 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
{
st_data_t compat_data;
- rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
- if (st_lookup(compat_allocator_tbl,
+ VALUE klass = CLASS_OF(obj);
+ rb_alloc_func_t allocator = RCLASS_SINGLETON_P(klass) ? 0 : rb_get_alloc_func(klass);
+ if (allocator && st_lookup(compat_allocator_tbl,
(st_data_t)allocator,
&compat_data)) {
marshal_compat_t *compat = (marshal_compat_t*)compat_data;
@@ -954,19 +952,23 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
if (FL_TEST(obj, FL_SINGLETON)) {
rb_raise(rb_eTypeError, "singleton class can't be dumped");
}
- w_byte(TYPE_CLASS, arg);
{
VALUE path = class2path(obj);
+ VALUE encname = w_encivar(path, arg);
+ w_byte(TYPE_CLASS, arg);
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
+ w_encname(encname, arg);
RB_GC_GUARD(path);
}
break;
case T_MODULE:
- w_byte(TYPE_MODULE, arg);
{
VALUE path = class2path(obj);
+ VALUE encname = w_encivar(path, arg);
+ w_byte(TYPE_MODULE, arg);
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
+ w_encname(encname, arg);
RB_GC_GUARD(path);
}
break;
@@ -1067,7 +1069,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
case T_STRUCT:
w_class(TYPE_STRUCT, obj, arg, TRUE);
{
- long len = RSTRUCT_LEN(obj);
+ long len = RSTRUCT_LEN_RAW(obj);
VALUE mem;
long i;
@@ -1075,7 +1077,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
mem = rb_struct_members(obj);
for (i=0; i<len; i++) {
w_symbol(RARRAY_AREF(mem, i), arg);
- w_object(RSTRUCT_GET(obj, i), arg, limit);
+ w_object(RSTRUCT_GET_RAW(obj, i), arg, limit);
}
}
break;
@@ -1172,7 +1174,7 @@ io_needed(void)
* * anonymous Class/Module.
* * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
* and so on)
- * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
+ * * an instance of MatchData, Method, UnboundMethod, Proc, Thread,
* ThreadGroup, Continuation
* * objects which define singleton methods
*/
@@ -1240,6 +1242,7 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
struct load_arg {
VALUE src;
char *buf;
+ long bufsize;
long buflen;
long readable;
long offset;
@@ -1325,15 +1328,23 @@ static unsigned char
r_byte1_buffered(struct load_arg *arg)
{
if (arg->buflen == 0) {
- long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
+ long readable = arg->readable < arg->bufsize ? arg->readable : arg->bufsize;
+ long read_len;
VALUE str, n = LONG2NUM(readable);
str = load_funcall(arg, arg->src, s_read, 1, &n);
if (NIL_P(str)) too_short();
StringValue(str);
- memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
+ read_len = RSTRING_LEN(str);
+ if (UNLIKELY(read_len < readable)) too_short();
+ if (UNLIKELY(read_len > arg->bufsize)) {
+ arg->buf = ruby_sized_realloc_n(arg->buf, read_len, 1, arg->bufsize);
+ arg->bufsize = read_len;
+ }
+ memcpy(arg->buf, RSTRING_PTR(str), read_len);
arg->offset = 0;
- arg->buflen = RSTRING_LEN(str);
+ arg->buflen = read_len;
+ RB_GC_GUARD(str);
}
arg->buflen--;
return arg->buf[arg->offset++];
@@ -1411,7 +1422,7 @@ long
ruby_marshal_read_long(const char **buf, long len)
{
long x;
- struct RString src;
+ struct RString src = {RBASIC_INIT};
struct load_arg arg;
memset(&arg, 0, sizeof(arg));
arg.src = rb_setup_fake_str(&src, *buf, len, 0);
@@ -1420,6 +1431,18 @@ ruby_marshal_read_long(const char **buf, long len)
return x;
}
+static long
+r_keep_readable(struct load_arg *arg, long len, size_t size)
+{
+ if (UNLIKELY(len < 0)) {
+ rb_raise(rb_eArgError, "negative length");
+ }
+ if (UNLIKELY((unsigned long)len > SIZE_MAX / size || arg->readable >= LONG_MAX - len)) {
+ rb_raise(rb_eArgError, "marshaled data too big");
+ }
+ return len;
+}
+
static VALUE
r_bytes1(long len, struct load_arg *arg)
{
@@ -1449,7 +1472,7 @@ r_bytes1_buffered(long len, struct load_arg *arg)
long tmp_len, read_len, need_len = len - buflen;
VALUE tmp, n;
- readable = readable < BUFSIZ ? readable : BUFSIZ;
+ readable = readable < arg->bufsize ? readable : arg->bufsize;
read_len = need_len > readable ? need_len : readable;
n = LONG2NUM(read_len);
tmp = load_funcall(arg, arg->src, s_read, 1, &n);
@@ -1515,7 +1538,7 @@ name_equal(const char *name, size_t nlen, const char *p, long l)
static int
sym2encidx(VALUE sym, VALUE val)
{
- static const char name_encoding[8] = "encoding";
+ RBIMPL_ATTR_NONSTRING() static const char name_encoding[8] = "encoding";
const char *p;
long l;
if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
@@ -1708,6 +1731,37 @@ r_copy_ivar(VALUE v, VALUE data)
"can't override instance variable of "type" '%"PRIsVALUE"'", \
(str))
+static int
+r_ivar_encoding(VALUE obj, struct load_arg *arg, VALUE sym, VALUE val)
+{
+ int idx = sym2encidx(sym, val);
+ if (idx >= 0) {
+ if (rb_enc_capable(obj)) {
+ // Check if needed to avoid rb_check_frozen() check for Regexps
+ if (rb_enc_get_index(obj) != idx) {
+ rb_enc_associate_index(obj, idx);
+ }
+ }
+ else {
+ rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
+ }
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static long
+r_encname(VALUE obj, struct load_arg *arg)
+{
+ long len = r_long(arg);
+ if (len > 0) {
+ VALUE sym = r_symbol(arg);
+ VALUE val = r_object(arg);
+ len -= r_ivar_encoding(obj, arg, sym, val);
+ }
+ return len;
+}
+
static void
r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
{
@@ -1724,14 +1778,7 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
do {
VALUE sym = r_symbol(arg);
VALUE val = r_object(arg);
- int idx = sym2encidx(sym, val);
- if (idx >= 0) {
- if (rb_enc_capable(obj)) {
- rb_enc_associate_index(obj, idx);
- }
- else {
- rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
- }
+ if (r_ivar_encoding(obj, arg, sym, val)) {
if (has_encoding) *has_encoding = TRUE;
}
else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
@@ -1816,17 +1863,17 @@ append_extmod(VALUE obj, VALUE extmod)
override_ivar_error(type, str); \
} while (0)
-static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type);
+static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE klass, VALUE extmod, int type);
static VALUE
r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
{
int type = r_byte(arg);
- return r_object_for(arg, partial, ivp, extmod, type);
+ return r_object_for(arg, partial, ivp, 0, extmod, type);
}
static VALUE
-r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type)
+r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE klass, VALUE extmod, int type)
{
VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size;
VALUE v = Qnil;
@@ -1841,6 +1888,9 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
}
v = (VALUE)link;
if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
+ if (arg->freeze && RB_TYPE_P(v, T_STRING)) {
+ v = rb_str_to_interned_str(v);
+ }
v = r_post_proc(v, arg);
}
break;
@@ -1899,12 +1949,12 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
}
type = r_byte(arg);
if ((c == rb_cHash) &&
- /* Hack for compare_by_identify */
+ /* Hack for compare_by_identity */
(type == TYPE_HASH || type == TYPE_HASH_DEF)) {
hash_new_with_size = rb_ident_hash_new_with_size;
goto type_hash;
}
- v = r_object_for(arg, partial, 0, extmod, type);
+ v = r_object_for(arg, partial, 0, c, extmod, type);
if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
goto format_error;
}
@@ -1976,7 +2026,10 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
int sign;
sign = r_byte(arg);
- len = r_long(arg);
+ if (sign != '+' && sign != '-') {
+ rb_raise(rb_eArgError, "invalid Bignum sign");
+ }
+ len = r_keep_readable(arg, r_long(arg), 2);
if (SIZEOF_VALUE >= 8 && len <= 4) {
// Representable within uintptr, likely FIXNUM
@@ -2041,7 +2094,10 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
}
rb_str_set_len(str, dst - ptr);
}
- VALUE regexp = rb_reg_new_str(str, options);
+ if (!klass) {
+ klass = rb_cRegexp;
+ }
+ VALUE regexp = rb_reg_init_str(rb_reg_s_alloc(klass), str, options);
r_copy_ivar(regexp, str);
v = r_entry0(regexp, idx, arg);
@@ -2051,7 +2107,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
case TYPE_ARRAY:
{
- long len = r_long(arg);
+ long len = r_keep_readable(arg, r_long(arg), 1);
v = rb_ary_new2(len);
v = r_entry(v, arg);
@@ -2069,7 +2125,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
case TYPE_HASH_DEF:
type_hash:
{
- long len = r_long(arg);
+ long len = r_keep_readable(arg, r_long(arg), 2);
v = hash_new_with_size(len);
v = r_entry(v, arg);
@@ -2095,7 +2151,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
VALUE slot;
st_index_t idx = r_prepare(arg);
VALUE klass = path2class(r_unique(arg));
- long len = r_long(arg);
+ long len = r_keep_readable(arg, r_long(arg), 2);
v = rb_obj_alloc(klass);
if (!RB_TYPE_P(v, T_STRUCT)) {
@@ -2255,6 +2311,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
{
VALUE str = r_bytes(arg);
+ if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
v = path2class(str);
prohibit_ivar("class", str);
v = r_entry(v, arg);
@@ -2266,6 +2323,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
{
VALUE str = r_bytes(arg);
+ if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
v = path2module(str);
prohibit_ivar("module", str);
v = r_entry(v, arg);
@@ -2310,8 +2368,9 @@ r_object(struct load_arg *arg)
static void
clear_load_arg(struct load_arg *arg)
{
- xfree(arg->buf);
+ ruby_xfree_sized(arg->buf, arg->bufsize);
arg->buf = NULL;
+ arg->bufsize = 0;
arg->buflen = 0;
arg->offset = 0;
arg->readable = 0;
@@ -2357,10 +2416,14 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze)
arg->readable = 0;
arg->freeze = freeze;
- if (NIL_P(v))
+ if (NIL_P(v)) {
+ arg->bufsize = BUFSIZ;
arg->buf = xmalloc(BUFSIZ);
- else
+ }
+ else {
+ arg->bufsize = 0;
arg->buf = 0;
+ }
major = r_byte(arg);
minor = r_byte(arg);
@@ -2532,25 +2595,25 @@ Init_marshal(void)
}
static int
-marshal_compat_table_mark_i(st_data_t key, st_data_t value, st_data_t _)
+marshal_compat_table_mark_and_move_i(st_data_t key, st_data_t value, st_data_t _)
{
marshal_compat_t *p = (marshal_compat_t *)value;
- rb_gc_mark_movable(p->newclass);
- rb_gc_mark_movable(p->oldclass);
+ rb_gc_mark_and_move(&p->newclass);
+ rb_gc_mark_and_move(&p->oldclass);
return ST_CONTINUE;
}
static void
-marshal_compat_table_mark(void *tbl)
+marshal_compat_table_mark_and_move(void *tbl)
{
if (!tbl) return;
- st_foreach(tbl, marshal_compat_table_mark_i, 0);
+ st_foreach(tbl, marshal_compat_table_mark_and_move_i, 0);
}
static int
marshal_compat_table_free_i(st_data_t key, st_data_t value, st_data_t _)
{
- xfree((marshal_compat_t *)value);
+ SIZED_FREE((marshal_compat_t *)value);
return ST_CONTINUE;
}
@@ -2567,29 +2630,13 @@ marshal_compat_table_memsize(const void *data)
return st_memsize(data) + sizeof(marshal_compat_t) * st_table_size(data);
}
-static int
-marshal_compat_table_compact_i(st_data_t key, st_data_t value, st_data_t _)
-{
- marshal_compat_t *p = (marshal_compat_t *)value;
- p->newclass = rb_gc_location(p->newclass);
- p->oldclass = rb_gc_location(p->oldclass);
- return ST_CONTINUE;
-}
-
-static void
-marshal_compat_table_compact(void *tbl)
-{
- if (!tbl) return;
- st_foreach(tbl, marshal_compat_table_compact_i, 0);
-}
-
static const rb_data_type_t marshal_compat_type = {
.wrap_struct_name = "marshal_compat_table",
.function = {
- .dmark = marshal_compat_table_mark,
+ .dmark = marshal_compat_table_mark_and_move,
.dfree = marshal_compat_table_free,
.dsize = marshal_compat_table_memsize,
- .dcompact = marshal_compat_table_compact,
+ .dcompact = marshal_compat_table_mark_and_move,
},
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY,
};