summaryrefslogtreecommitdiff
path: root/marshal.c
diff options
context:
space:
mode:
Diffstat (limited to 'marshal.c')
-rw-r--r--marshal.c868
1 files changed, 868 insertions, 0 deletions
diff --git a/marshal.c b/marshal.c
new file mode 100644
index 0000000000..ac95e438ae
--- /dev/null
+++ b/marshal.c
@@ -0,0 +1,868 @@
+/************************************************
+
+ marshal.c -
+
+ $Author$
+ $Revision$
+ $Date$
+ created at: Thu Apr 27 16:30:01 JST 1995
+
+************************************************/
+
+#include "ruby.h"
+#include "io.h"
+#include "st.h"
+
+#define MARSHAL_MAJOR 4
+#define MARSHAL_MINOR 0
+
+#define TYPE_NIL '0'
+#define TYPE_TRUE 'T'
+#define TYPE_FALSE 'F'
+#define TYPE_FIXNUM 'i'
+
+#define TYPE_UCLASS 'C'
+#define TYPE_OBJECT 'o'
+#define TYPE_USERDEF 'u'
+#define TYPE_FLOAT 'f'
+#define TYPE_BIGNUM 'l'
+#define TYPE_STRING '"'
+#define TYPE_REGEXP '/'
+#define TYPE_ARRAY '['
+#define TYPE_HASH '{'
+#define TYPE_STRUCT 'S'
+#define TYPE_MODULE 'M'
+
+#define TYPE_SYMBOL ':'
+#define TYPE_SYMLINK ';'
+
+#define TYPE_LINK '@'
+
+extern VALUE cString;
+extern VALUE cRegexp;
+extern VALUE cArray;
+extern VALUE cHash;
+
+VALUE rb_path2class();
+
+static ID s_dump, s_load;
+
+struct dump_arg {
+ VALUE obj;
+ FILE *fp;
+ VALUE str;
+ st_table *symbol;
+ st_table *data;
+};
+
+struct dump_call_arg {
+ VALUE obj;
+ struct dump_arg *arg;
+ int limit;
+};
+
+static void w_long _((long, struct dump_arg*));
+
+static void
+w_byte(c, arg)
+ char c;
+ struct dump_arg *arg;
+{
+ if (arg->fp) putc(c, arg->fp);
+ else str_cat(arg->str, (UCHAR*)&c, 1);
+}
+
+static void
+w_bytes(s, n, arg)
+ char *s;
+ int n;
+ struct dump_arg *arg;
+{
+ w_long(n, arg);
+ if (arg->fp) {
+ fwrite(s, 1, n, arg->fp);
+ }
+ else {
+ str_cat(arg->str, s, n);
+ }
+}
+
+static void
+w_short(x, arg)
+ int x;
+ struct dump_arg *arg;
+{
+ int i;
+
+ for (i=0; i<sizeof(USHORT); i++) {
+ w_byte((x >> (i*8)) & 0xff, arg);
+ }
+}
+
+static void
+w_long(x, arg)
+ long x;
+ struct dump_arg *arg;
+{
+ char buf[sizeof(long)+1];
+ int i, len = 0;
+
+ if (x == 0) {
+ w_byte(0, arg);
+ return;
+ }
+ for (i=1;i<sizeof(long)+1;i++) {
+ buf[i] = x & 0xff;
+ x = RSHIFT(x,8);
+ if (x == 0) {
+ buf[0] = i;
+ break;
+ }
+ if (x == -1) {
+ buf[0] = -i;
+ break;
+ }
+ }
+ len = i;
+ for (i=0;i<=len;i++) {
+ w_byte(buf[i], arg);
+ }
+}
+
+static void
+w_float(d, arg)
+ double d;
+ struct dump_arg *arg;
+{
+ char buf[100];
+
+ sprintf(buf, "%.12g", d);
+ w_bytes(buf, strlen(buf), arg);
+}
+
+static void
+w_symbol(id, arg)
+ ID id;
+ struct dump_arg *arg;
+{
+ char *sym = rb_id2name(id);
+ int num;
+
+ if (st_lookup(arg->symbol, id, &num)) {
+ w_byte(TYPE_SYMLINK, arg);
+ w_long(num, arg);
+ }
+ else {
+ w_byte(TYPE_SYMBOL, arg);
+ w_bytes(sym, strlen(sym), arg);
+ st_insert(arg->symbol, id, arg->symbol->num_entries);
+ }
+}
+
+static void
+w_unique(s, arg)
+ char *s;
+ struct dump_arg *arg;
+{
+ w_symbol(rb_intern(s), arg);
+}
+
+static void w_object _((VALUE,struct dump_arg*,int));
+extern VALUE cIO, cBignum, cStruct;
+
+static int
+hash_each(key, value, arg)
+ VALUE key, value;
+ struct dump_call_arg *arg;
+{
+ w_object(key, arg->arg, arg->limit);
+ w_object(value, arg->arg, arg->limit);
+ return ST_CONTINUE;
+}
+
+static int
+obj_each(id, value, arg)
+ ID id;
+ VALUE value;
+ struct dump_call_arg *arg;
+{
+ w_symbol(id, arg->arg);
+ w_object(value, arg->arg, arg->limit);
+ return ST_CONTINUE;
+}
+
+static void
+w_uclass(obj, class, arg)
+ VALUE obj, class;
+ struct dump_arg *arg;
+{
+ if (CLASS_OF(obj) != class) {
+ w_byte(TYPE_UCLASS, arg);
+ w_unique(rb_class2name(CLASS_OF(obj)), arg);
+ }
+}
+
+static void
+w_object(obj, arg, limit)
+ VALUE obj;
+ struct dump_arg *arg;
+ int limit;
+{
+ int n;
+ struct dump_call_arg c_arg;
+
+ if (limit == 0) {
+ Fail("exceed depth limit");
+ }
+ limit--;
+ c_arg.limit = limit;
+ c_arg.arg = arg;
+
+ if (obj == Qnil) {
+ w_byte(TYPE_NIL, arg);
+ }
+ else if (obj == TRUE) {
+ w_byte(TYPE_TRUE, arg);
+ }
+ else if (obj == FALSE) {
+ w_byte(TYPE_FALSE, arg);
+ }
+ else if (FIXNUM_P(obj)) {
+#if SIZEOF_LONG <= 4
+ w_byte(TYPE_FIXNUM, arg);
+ w_long(FIX2INT(obj), arg);
+#else
+ if (RSHIFT(obj, 32) == 0 || RSHIFT(obj, 32) == -1) {
+ w_byte(TYPE_FIXNUM, arg);
+ w_long(FIX2INT(obj), arg);
+ }
+ else {
+ obj = int2big(FIX2INT(obj));
+ goto write_bignum;
+ }
+#endif
+ }
+ else {
+ int num;
+
+ if (st_lookup(arg->data, obj, &num)) {
+ w_byte(TYPE_LINK, arg);
+ w_long(num, arg);
+ return;
+ }
+
+ st_insert(arg->data, obj, arg->data->num_entries);
+ if (rb_respond_to(obj, s_dump)) {
+ VALUE v;
+
+ w_byte(TYPE_USERDEF, arg);
+ w_unique(rb_class2name(CLASS_OF(obj)), arg);
+ v = rb_funcall(obj, s_dump, 1, limit);
+ if (TYPE(v) != T_STRING) {
+ TypeError("_dump_to must return String");
+ }
+ w_bytes(RSTRING(v)->ptr, RSTRING(v)->len, arg);
+ return;
+ }
+
+ switch (BUILTIN_TYPE(obj)) {
+ case T_MODULE:
+ case T_CLASS:
+ w_byte(TYPE_MODULE, arg);
+ {
+ VALUE path = rb_class_path(obj);
+ w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
+ }
+ return;
+
+ case T_FLOAT:
+ w_byte(TYPE_FLOAT, arg);
+ w_float(RFLOAT(obj)->value, arg);
+ return;
+
+ case T_BIGNUM:
+ write_bignum:
+ w_byte(TYPE_BIGNUM, arg);
+ {
+ char sign = RBIGNUM(obj)->sign?'+':'-';
+ int len = RBIGNUM(obj)->len;
+ USHORT *d = RBIGNUM(obj)->digits;
+
+ w_byte(sign, arg);
+ w_long(len, arg);
+ while (len--) {
+ w_short(*d, arg);
+ d++;
+ }
+ }
+ return;
+
+ case T_STRING:
+ w_uclass(obj, cString, arg);
+ w_byte(TYPE_STRING, arg);
+ w_bytes(RSTRING(obj)->ptr, RSTRING(obj)->len, arg);
+ return;
+
+ case T_REGEXP:
+ w_uclass(obj, cRegexp, arg);
+ w_byte(TYPE_REGEXP, arg);
+ w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
+ w_byte(FL_TEST(obj, FL_USER1), arg);
+ return;
+
+ case T_ARRAY:
+ w_uclass(obj, cArray, arg);
+ w_byte(TYPE_ARRAY, arg);
+ {
+ int len = RARRAY(obj)->len;
+ VALUE *ptr = RARRAY(obj)->ptr;
+
+ w_long(len, arg);
+ while (len--) {
+ w_object(*ptr, arg, limit);
+ ptr++;
+ }
+ }
+ break;
+
+ case T_HASH:
+ w_uclass(obj, cHash, arg);
+ w_byte(TYPE_HASH, arg);
+ w_long(RHASH(obj)->tbl->num_entries, arg);
+ st_foreach(RHASH(obj)->tbl, hash_each, &c_arg);
+ break;
+
+ case T_STRUCT:
+ w_byte(TYPE_STRUCT, arg);
+ {
+ int len = RSTRUCT(obj)->len;
+ char *path = rb_class2name(CLASS_OF(obj));
+ VALUE mem;
+ int i;
+
+ w_unique(path, arg);
+ w_long(len, arg);
+ mem = rb_ivar_get(CLASS_OF(obj), rb_intern("__member__"));
+ if (mem == Qnil) {
+ Fatal("non-initialized struct");
+ }
+ for (i=0; i<len; i++) {
+ w_symbol(FIX2INT(RARRAY(mem)->ptr[i]), arg);
+ w_object(RSTRUCT(obj)->ptr[i], arg, limit);
+ }
+ }
+ break;
+
+ case T_OBJECT:
+ w_byte(TYPE_OBJECT, arg);
+ {
+ VALUE class = CLASS_OF(obj);
+ char *path;
+
+ if (FL_TEST(class, FL_SINGLETON)) {
+ TypeError("singleton can't be dumped");
+ }
+ path = rb_class2name(class);
+ w_unique(path, arg);
+ if (ROBJECT(obj)->iv_tbl) {
+ w_long(ROBJECT(obj)->iv_tbl->num_entries, arg);
+ st_foreach(ROBJECT(obj)->iv_tbl, obj_each, &c_arg);
+ }
+ else {
+ w_long(0, arg);
+ }
+ }
+ break;
+
+ default:
+ TypeError("can't dump %s", rb_class2name(CLASS_OF(obj)));
+ break;
+ }
+ }
+}
+
+static VALUE
+dump(arg)
+ struct dump_call_arg *arg;
+{
+ w_object(arg->obj, arg->arg, arg->limit);
+}
+
+static VALUE
+dump_ensure(arg)
+ struct dump_arg *arg;
+{
+ st_free_table(arg->symbol);
+ st_free_table(arg->data);
+}
+
+static VALUE
+marshal_dump(argc, argv)
+ int argc;
+ VALUE* argv;
+{
+ VALUE obj, port, a1, a2;
+ int limit = -1;
+ extern VALUE cIO;
+ struct dump_arg arg;
+ struct dump_call_arg c_arg;
+
+ port = 0;
+ rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
+ if (argc == 3) {
+ limit = NUM2INT(a2);
+ port = a1;
+ }
+ else if (argc == 2) {
+ if (FIXNUM_P(a1)) limit = FIX2INT(a1);
+ else port = a1;
+ }
+ if (port) {
+ if (obj_is_kind_of(port, cIO)) {
+ OpenFile *fptr;
+
+ io_binmode(port);
+ GetOpenFile(port, fptr);
+ io_writable(fptr);
+ arg.fp = (fptr->f2) ? fptr->f2 : fptr->f;
+ }
+ else {
+ TypeError("instance of IO needed");
+ }
+ }
+ else {
+ arg.fp = 0;
+ port = str_new(0, 0);
+ arg.str = port;
+ }
+
+ arg.symbol = st_init_numtable();
+ arg.data = st_init_numtable();
+ c_arg.obj = obj;
+ c_arg.arg = &arg;
+ c_arg.limit = limit;
+
+ w_byte(MARSHAL_MAJOR, &arg);
+ w_byte(MARSHAL_MINOR, &arg);
+
+ rb_ensure(dump, &c_arg, dump_ensure, &arg);
+
+ return port;
+}
+
+struct load_arg {
+ FILE *fp;
+ UCHAR *ptr, *end;
+ st_table *symbol;
+ st_table *data;
+ VALUE proc;
+};
+
+static int
+r_byte(arg)
+ struct load_arg *arg;
+{
+ if (arg->fp) return getc(arg->fp);
+ if (arg->ptr < arg->end) return *arg->ptr++;
+ return EOF;
+}
+
+static USHORT
+r_short(arg)
+ struct load_arg *arg;
+{
+ USHORT x;
+ int i;
+
+ x = 0;
+ for (i=0; i<sizeof(USHORT); i++) {
+ x |= r_byte(arg)<<(i*8);
+ }
+
+ return x;
+}
+
+static void
+long_toobig(size)
+ int size;
+{
+ TypeError("long too big for this architecture (size %d, given %d)",
+ sizeof(long), size);
+}
+
+static long
+r_long(arg)
+ struct load_arg *arg;
+{
+ int c = r_byte(arg), i;
+ register long x;
+
+ if (c == 0) return 0;
+ if (c > 0) {
+ if (c > sizeof(long)) long_toobig((int)c);
+ x = 0;
+ for (i=0;i<c;i++) {
+ x |= (long)r_byte(arg) << (8*i);
+ }
+ }
+ else if (c < 0) {
+ c = -c;
+ if (c > sizeof(long)) long_toobig((int)c);
+ x = -1;
+ for (i=0;i<c;i++) {
+ x &= ~(0xff << (8*i));
+ x |= (long)r_byte(arg) << (8*i);
+ }
+ }
+ return x;
+}
+
+#define r_bytes(s, arg) \
+ (s = (char*)r_long(arg), r_bytes0(&s,ALLOCA_N(char,(long)s),(long)s,arg))
+
+static int
+r_bytes0(sp, s, len, arg)
+ char **sp, *s;
+ int len;
+ struct load_arg *arg;
+{
+ if (arg->fp) {
+ len = fread(s, 1, len, arg->fp);
+ }
+ else {
+ if (arg->ptr + len > arg->end) {
+ len = arg->end - arg->ptr;
+ }
+ memcpy(s, arg->ptr, len);
+ arg->ptr += len;
+ }
+
+ (s)[len] = '\0';
+ *sp = s;
+
+ return len;
+}
+
+static ID
+r_symbol(arg)
+ struct load_arg *arg;
+{
+ char *buf;
+ ID id;
+ char type;
+
+ if (r_byte(arg) == TYPE_SYMLINK) {
+ int num = r_long(arg);
+
+ if (st_lookup(arg->symbol, num, &id)) {
+ return id;
+ }
+ TypeError("bad symbol");
+ }
+ r_bytes(buf, arg);
+ id = rb_intern(buf);
+ st_insert(arg->symbol, arg->symbol->num_entries, id);
+
+ return id;
+}
+
+static char*
+r_unique(arg)
+ struct load_arg *arg;
+{
+ return rb_id2name(r_symbol(arg));
+}
+
+static VALUE
+r_string(arg)
+ struct load_arg *arg;
+{
+ char *buf;
+ int len = r_bytes(buf, arg);
+
+ return str_taint(str_new(buf, len));
+}
+
+static VALUE
+r_regist(v, arg)
+ VALUE v;
+ struct load_arg *arg;
+{
+ if (arg->proc) {
+ rb_funcall(arg->proc, rb_intern("call"), 1, v);
+ }
+ st_insert(arg->data, arg->data->num_entries, v);
+ return v;
+}
+
+static VALUE
+r_object(arg)
+ struct load_arg *arg;
+{
+ VALUE v;
+ int type = r_byte(arg);
+
+ switch (type) {
+ case EOF:
+ eof_error();
+ return Qnil;
+
+ case TYPE_LINK:
+ if (st_lookup(arg->data, r_long(arg), &v)) {
+ return v;
+ }
+ ArgError("dump format error (unlinked)");
+ break;
+
+ case TYPE_UCLASS:
+ {
+ VALUE c = rb_path2class(r_unique(arg));
+ v = r_object(arg);
+ if (rb_special_const_p(v)) {
+ ArgError("dump format error (user class)");
+ }
+ RBASIC(v)->class = c;
+ return v;
+ }
+
+ case TYPE_NIL:
+ return Qnil;
+
+ case TYPE_TRUE:
+ return TRUE;
+
+ case TYPE_FALSE:
+ return FALSE;
+
+ case TYPE_FIXNUM:
+ {
+ int i = r_long(arg);
+ return INT2FIX(i);
+ }
+
+ case TYPE_FLOAT:
+ {
+#ifndef atof
+ double atof();
+#endif
+ char *buf;
+
+ r_bytes(buf, arg);
+ v = float_new(atof(buf));
+ return r_regist(v, arg);
+ }
+
+ case TYPE_BIGNUM:
+ {
+ int len;
+ USHORT *digits;
+
+ NEWOBJ(big, struct RBignum);
+ OBJSETUP(big, cBignum, T_BIGNUM);
+ big->sign = (r_byte(arg) == '+');
+ big->len = len = r_long(arg);
+ big->digits = digits = ALLOC_N(USHORT, len);
+ while (len--) {
+ *digits++ = r_short(arg);
+ }
+ big = RBIGNUM(big_norm((VALUE)big));
+ if (TYPE(big) == T_BIGNUM) {
+ r_regist(big, arg);
+ }
+ return (VALUE)big;
+ }
+
+ case TYPE_STRING:
+ return r_regist(r_string(arg), arg);
+
+ case TYPE_REGEXP:
+ {
+ char *buf;
+ int len = r_bytes(buf, arg);
+ int ci = r_byte(arg);
+ return r_regist(reg_new(buf, len, ci), arg);
+ }
+
+ case TYPE_ARRAY:
+ {
+ volatile int len = r_long(arg);
+ v = ary_new2(len);
+ r_regist(v, arg);
+ while (len--) {
+ ary_push(v, r_object(arg));
+ }
+ return v;
+ }
+
+ case TYPE_HASH:
+ {
+ int len = r_long(arg);
+
+ v = hash_new();
+ r_regist(v, arg);
+ while (len--) {
+ VALUE key = r_object(arg);
+ VALUE value = r_object(arg);
+ hash_aset(v, key, value);
+ }
+ return v;
+ }
+
+ case TYPE_STRUCT:
+ {
+ VALUE class, mem, values;
+ volatile int i; /* gcc 2.7.2.3 -O2 bug?? */
+ int len;
+ ID slot;
+
+ class = rb_path2class(r_unique(arg));
+ mem = rb_ivar_get(class, rb_intern("__member__"));
+ if (mem == Qnil) {
+ Fatal("non-initialized struct");
+ }
+ len = r_long(arg);
+
+ values = ary_new2(len);
+ for (i=0; i<len; i++) {
+ ary_push(values, Qnil);
+ }
+ v = struct_alloc(class, values);
+ r_regist(v, arg);
+ for (i=0; i<len; i++) {
+ slot = r_symbol(arg);
+
+ if (RARRAY(mem)->ptr[i] != INT2FIX(slot)) {
+ TypeError("struct %s not compatible (:%s for :%s)",
+ rb_class2name(class),
+ rb_id2name(slot),
+ rb_id2name(FIX2INT(RARRAY(mem)->ptr[i])));
+ }
+ struct_aset(v, INT2FIX(i), r_object(arg));
+ }
+ return v;
+ }
+ break;
+
+ case TYPE_USERDEF:
+ {
+ VALUE class;
+ int len;
+
+ class = rb_path2class(r_unique(arg));
+ if (rb_respond_to(class, s_load)) {
+ v = rb_funcall(class, s_load, 1, r_string(arg));
+ return r_regist(v, arg);
+ }
+ TypeError("class %s needs to have method `_load_from'",
+ rb_class2name(class));
+ }
+ break;
+
+ case TYPE_OBJECT:
+ {
+ VALUE class;
+ int len;
+
+ class = rb_path2class(r_unique(arg));
+ len = r_long(arg);
+ v = obj_alloc(class);
+ r_regist(v, arg);
+ if (len > 0) {
+ while (len--) {
+ ID id = r_symbol(arg);
+ VALUE val = r_object(arg);
+ rb_ivar_set(v, id, val);
+ }
+ }
+ return v;
+ }
+ break;
+
+ case TYPE_MODULE:
+ {
+ char *buf;
+ r_bytes(buf, arg);
+ return rb_path2class(buf);
+ }
+
+ default:
+ ArgError("dump format error(0x%x)", type);
+ break;
+ }
+}
+
+static VALUE
+load(arg)
+ struct load_arg *arg;
+{
+ return r_object(arg);
+}
+
+static VALUE
+load_ensure(arg)
+ struct load_arg *arg;
+{
+ st_free_table(arg->symbol);
+ st_free_table(arg->data);
+}
+
+static VALUE
+marshal_load(argc, argv)
+ int argc;
+ VALUE *argv;
+{
+ VALUE port, proc;
+ FILE *fp;
+ int major;
+ VALUE v;
+ OpenFile *fptr;
+ struct load_arg arg;
+
+ rb_scan_args(argc, argv, "11", &port, &proc);
+ if (TYPE(port) == T_STRING) {
+ arg.fp = 0;
+ arg.ptr = RSTRING(port)->ptr;
+ arg.end = arg.ptr + RSTRING(port)->len;
+ }
+ else {
+ if (obj_is_kind_of(port, cIO)) {
+ io_binmode(port);
+ GetOpenFile(port, fptr);
+ io_readable(fptr);
+ arg.fp = fptr->f;
+ }
+ else {
+ TypeError("instance of IO needed");
+ }
+ }
+
+ major = r_byte(&arg);
+ if (major == MARSHAL_MAJOR) {
+ if (r_byte(&arg) != MARSHAL_MINOR) {
+ Warning("Old marshal file format (can be read)");
+ }
+ arg.symbol = st_init_numtable();
+ arg.data = st_init_numtable();
+ if (NIL_P(proc)) arg.proc = 0;
+ else arg.proc = proc;
+ v = rb_ensure(load, &arg, load_ensure, &arg);
+ }
+ else {
+ TypeError("Old marshal file format (can't read)");
+ }
+
+ return v;
+}
+
+Init_marshal()
+{
+ VALUE mMarshal = rb_define_module("Marshal");
+
+ s_dump = rb_intern("_dump_to");
+ s_load = rb_intern("_load_from");
+ rb_define_module_function(mMarshal, "dump", marshal_dump, -1);
+ rb_define_module_function(mMarshal, "load", marshal_load, -1);
+ rb_define_module_function(mMarshal, "restore", marshal_load, 1);
+
+ rb_provide("marshal.o"); /* for backward compatibility */
+}