diff options
-rw-r--r-- | ChangeLog | 60 | ||||
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | compile.c | 1431 | ||||
-rw-r--r-- | encoding.c | 6 | ||||
-rw-r--r-- | insns.def | 6 | ||||
-rw-r--r-- | internal.h | 1 | ||||
-rw-r--r-- | iseq.c | 151 | ||||
-rw-r--r-- | iseq.h | 33 | ||||
-rw-r--r-- | load.c | 16 | ||||
-rw-r--r-- | proc.c | 13 | ||||
-rw-r--r-- | sample/iseq_loader.rb | 240 | ||||
-rw-r--r-- | test/lib/iseq_loader_checker.rb | 47 | ||||
-rw-r--r-- | test/runner.rb | 2 | ||||
-rw-r--r-- | vm.c | 3 | ||||
-rw-r--r-- | vm_core.h | 46 | ||||
-rw-r--r-- | vm_insnhelper.c | 7 |
16 files changed, 1980 insertions, 90 deletions
@@ -1,3 +1,63 @@ +Tue Dec 8 22:31:58 2015 Koichi Sasada <ko1@atdot.net> + + * introduce new ISeq binary format serializer/de-serializer + and a pre-compilation/runtime loader sample. + [Feature #11788] + + * iseq.c: add new methods: + * RubyVM::InstructionSequence#to_binary_format(extra_data = nil) + * RubyVM::InstructionSequence.from_binary_format(binary) + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) + + * compile.c: implement body of this new feature. + + * load.c (rb_load_internal0), iseq.c (rb_iseq_load_iseq): + call RubyVM::InstructionSequence.load_iseq(fname) with + loading script name if this method is defined. + + We can return any ISeq object as a result value. + Otherwise loading will be continue as usual. + + This interface is not matured and is not extensible. + So that we don't guarantee the future compatibility of this method. + Basically, you should'nt use this method. + + * iseq.h: move ISEQ_MAJOR/MINOR_VERSION (and some definitions) + from iseq.c. + + * encoding.c (rb_data_is_encoding), internal.h: added. + + * vm_core.h: add several supports for lazy load. + * add USE_LAZY_LOAD macro to specify enable or disable of + this feature. + * add several fields to rb_iseq_t. + * introduce new macro rb_iseq_check(). + + * insns.def: some check for lazy loading feature. + + * vm_insnhelper.c: ditto. + + * proc.c: ditto. + + * vm.c: ditto. + + * test/lib/iseq_loader_checker.rb: enabled iff suitable + environment variables are provided. + + * test/runner.rb: enable lib/iseq_loader_checker.rb. + + * sample/iseq_loader.rb: add sample compiler and loader. + + $ ruby sample/iseq_loader.rb [dir] + + will compile all ruby scripts in [dir]. + With default setting, this compile creates *.rb.yarb files + in same directory of target .rb scripts. + + $ ruby -r sample/iseq_loader.rb [app] + + will run with enable to load compiled binary data. + Tue Dec 8 21:21:16 2015 Kazuhiro NISHIYAMA <zn@mbf.nifty.com> * NEWS: mention about Enumerator::Lazy#grep_v. @@ -116,6 +116,14 @@ with all sufficient information, see the ChangeLog file. * Regexp/String: Updated Unicode version from 7.0.0 to 8.0.0 +* RubyVM::InstructionSequence + * add the following methods as a primitive tool of iseq loader. + See sample/iseq_loader.rb for usage. + [Feature #11788] + * RubyVM::InstructionSequence#to_binary_format(extra_data = nil) + * RubyVM::InstructionSequence.from_binary_format(binary) + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) + * String * String#+@ and String#- are added to get mutable/frozen strings. @@ -10,6 +10,8 @@ **********************************************************************/ #include "internal.h" +#include "ruby/re.h" +#include "encindex.h" #include <math.h> #define USE_INSN_STACK_INCREASE 1 @@ -17,6 +19,7 @@ #include "iseq.h" #include "insns.inc" #include "insns_info.inc" +#include "gc.h" #ifdef HAVE_DLADDR # include <dlfcn.h> @@ -6785,3 +6788,1431 @@ rb_method_for_self_aset(VALUE name, VALUE arg, rb_insn_func_t func) { return method_for_self(name, arg, func, for_self_aset); } + +/* ISeq binary format */ + +typedef unsigned int ibf_offset_t; +#define IBF_OFFSET(ptr) ((ibf_offset_t)(VALUE)(ptr)) + +struct ibf_header { + char magic[4]; /* YARB */ + unsigned int major_version; + unsigned int minor_version; + unsigned int size; + unsigned int extra_size; + + unsigned int iseq_list_size; + unsigned int id_list_size; + unsigned int object_list_size; + + ibf_offset_t iseq_list_offset; + ibf_offset_t id_list_offset; + ibf_offset_t object_list_offset; +}; + +struct ibf_id_entry { + enum { + ibf_id_enc_ascii, + ibf_id_enc_utf8, + ibf_id_enc_other + } enc : 2; + char body[1]; +}; + +struct ibf_dump { + VALUE str; + VALUE iseq_list; /* [iseq0 offset, ...] */ + VALUE obj_list; /* [objs] */ + st_table *iseq_table; /* iseq -> iseq number */ + st_table *id_table; /* id -> id number */ +}; + +rb_iseq_t * iseq_alloc(void); + +struct ibf_load { + const char *buff; + const struct ibf_header *header; + ID *id_list; /* [id0, ...] */ + VALUE iseq_list; /* [iseq0, ...] */ + VALUE obj_list; /* [obj0, ...] */ + VALUE loader_obj; + VALUE str; + rb_iseq_t *iseq; +}; + +static ibf_offset_t +ibf_dump_pos(struct ibf_dump *dump) +{ + return (unsigned int)rb_str_strlen(dump->str); +} + +static ibf_offset_t +ibf_dump_write(struct ibf_dump *dump, const void *buff, unsigned long size) +{ + ibf_offset_t pos = ibf_dump_pos(dump); + rb_str_cat(dump->str, (const char *)buff, size); + /* TODO: overflow check */ + return pos; +} + +static void +ibf_dump_overwrite(struct ibf_dump *dump, void *buff, unsigned int size, long offset) +{ + VALUE str = dump->str; + char *ptr = RSTRING_PTR(str); + if (size + offset > RSTRING_LEN(str)) rb_bug("ibf_dump_overwrite: overflow"); + memcpy(ptr + offset, buff, size); +} + +static void * +ibf_load_alloc(const struct ibf_load *load, ibf_offset_t offset, int size) +{ + void *buff = ruby_xmalloc(size); + memcpy(buff, load->buff + offset, size); + return buff; +} + +#define IBF_W(b, type, n) (type *)(VALUE)ibf_dump_write(dump, (b), sizeof(type) * (n)) +#define IBF_WV(variable) ibf_dump_write(dump, &(variable), sizeof(variable)) +#define IBF_WP(b, type, n) ibf_dump_write(dump, (b), sizeof(type) * (n)) +#define IBF_R(val, type, n) (type *)ibf_load_alloc(load, IBF_OFFSET(val), sizeof(type) * (n)) + +static int +ibf_table_lookup(struct st_table *table, st_data_t key) +{ + st_data_t val; + + if (st_lookup(table, key, &val)) { + return (int)val; + } + else { + return -1; + } +} + +static int +ibf_table_index(struct st_table *table, st_data_t key) +{ + int index = ibf_table_lookup(table, key); + + if (index < 0) { /* not found */ + index = (int)table->num_entries; + st_insert(table, key, (st_data_t)index); + } + + return index; +} + +/* dump/load generic */ + +static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index); +static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq); + +static VALUE +ibf_dump_object(struct ibf_dump *dump, VALUE obj) +{ + long index = RARRAY_LEN(dump->obj_list); + long i; + for (i=0; i<index; i++) { + if (RARRAY_AREF(dump->obj_list, i) == obj) return (VALUE)i; /* dedup */ + } + rb_ary_push(dump->obj_list, obj); + return (VALUE)index; +} + +static VALUE +ibf_dump_id(struct ibf_dump *dump, ID id) +{ + return (VALUE)ibf_table_index(dump->id_table, (st_data_t)id); +} + +static ID +ibf_load_id(const struct ibf_load *load, const ID id_index) +{ + ID id; + + if (id_index == 0) { + id = 0; + } + else { + id = load->id_list[(long)id_index]; + + if (id == 0) { + long *indices = (long *)(load->buff + load->header->id_list_offset); + VALUE str = ibf_load_object(load, indices[id_index]); + id = NIL_P(str) ? 0 : rb_intern_str(str); /* str == nil -> internal junk id */ + load->id_list[(long)id_index] = id; + } + } + + return id; +} + +/* dump/load: code */ + +static VALUE +ibf_dump_callinfo(struct ibf_dump *dump, const struct rb_call_info *ci) +{ + return (ci->flag & VM_CALL_KWARG) ? Qtrue : Qfalse; +} + +static ibf_offset_t ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq); + +static rb_iseq_t * +ibf_dump_iseq(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + if (iseq == NULL) { + return (rb_iseq_t *)-1; + } + else { + int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq); + if (iseq_index < 0) { + iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq); + rb_ary_store(dump->iseq_list, iseq_index, LONG2NUM(ibf_dump_iseq_each(dump, rb_iseq_check(iseq)))); + } + return (rb_iseq_t *)(VALUE)iseq_index; + } +} + +static VALUE +ibf_dump_gentry(struct ibf_dump *dump, const struct rb_global_entry *entry) +{ + return (VALUE)ibf_dump_id(dump, entry->id); +} + +static VALUE +ibf_load_gentry(const struct ibf_load *load, const struct rb_global_entry *entry) +{ + ID gid = ibf_load_id(load, (ID)(VALUE)entry); + return (VALUE)rb_global_entry(gid); +} + +static VALUE * +ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const int iseq_size = iseq->body->iseq_size; + int code_index; + VALUE *code; + const VALUE *orig_code = rb_iseq_original_iseq(iseq); + + code = ALLOCA_N(VALUE, iseq_size); + + for (code_index=0; code_index<iseq_size;) { + const VALUE insn = orig_code[code_index]; + const char *types = insn_op_types(insn); + int op_index; + + code[code_index++] = (VALUE)insn; + + for (op_index=0; types[op_index]; op_index++, code_index++) { + VALUE op = orig_code[code_index]; + switch (types[op_index]) { + case TS_CDHASH: + case TS_VALUE: + code[code_index] = ibf_dump_object(dump, op); + break; + case TS_ISEQ: + code[code_index] = (VALUE)ibf_dump_iseq(dump, (const rb_iseq_t *)op); + break; + case TS_IC: + { + unsigned int i; + for (i=0; i<iseq->body->is_size; i++) { + if (op == (VALUE)&iseq->body->is_entries[i]) { + break; + } + } + code[code_index] = i; + } + break; + case TS_CALLINFO: + code[code_index] = ibf_dump_callinfo(dump, (const struct rb_call_info *)op); + break; + case TS_CALLCACHE: + code[code_index] = 0; + break; + case TS_ID: + code[code_index] = ibf_dump_id(dump, (ID)op); + break; + case TS_GENTRY: + code[code_index] = ibf_dump_gentry(dump, (const struct rb_global_entry *)op); + break; + case TS_FUNCPTR: + rb_raise(rb_eRuntimeError, "TS_FUNCPTR is not supported"); + break; + default: + code[code_index] = op; + break; + } + } + assert(insn_len(insn) == op_index+1); + } + + return IBF_W(code, VALUE, iseq_size); +} + +static VALUE * +ibf_load_code(const struct ibf_load *load, const rb_iseq_t *iseq, const struct rb_iseq_constant_body *body) +{ + const int iseq_size = body->iseq_size; + int code_index; + VALUE *code = IBF_R(body->iseq_encoded, VALUE, iseq_size); + + struct rb_call_info *ci_entries = iseq->body->ci_entries; + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; + struct rb_call_cache *cc_entries = iseq->body->cc_entries; + union iseq_inline_storage_entry *is_entries = iseq->body->is_entries; + + for (code_index=0; code_index<iseq_size;) { + const VALUE insn = code[code_index++]; + const char *types = insn_op_types(insn); + int op_index; + + for (op_index=0; types[op_index]; op_index++, code_index++) { + VALUE op = code[code_index]; + + switch (types[op_index]) { + case TS_CDHASH: + case TS_VALUE: + code[code_index] = ibf_load_object(load, op); + break; + case TS_ISEQ: + code[code_index] = (VALUE)ibf_load_iseq(load, (const rb_iseq_t *)op); + break; + case TS_IC: + code[code_index] = (VALUE)&is_entries[(int)op]; + break; + case TS_CALLINFO: + code[code_index] = op ? (VALUE)ci_kw_entries++ : (VALUE)ci_entries++; /* op is Qtrue (kw) or Qfalse (!kw) */ + break; + case TS_CALLCACHE: + code[code_index] = (VALUE)cc_entries++; + break; + case TS_ID: + code[code_index] = ibf_load_id(load, (ID)op); + break; + case TS_GENTRY: + code[code_index] = ibf_load_gentry(load, (const struct rb_global_entry *)op); + break; + case TS_FUNCPTR: + rb_raise(rb_eRuntimeError, "TS_FUNCPTR is not supported"); + break; + default: + /* code[code_index] = op; */ + break; + } + } + assert(insn_len(insn) == op_index+1); + }; + + + return code; +} + +static VALUE * +ibf_dump_param_opt_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + int opt_num = iseq->body->param.opt_num; + + if (opt_num > 0) { + return IBF_W(iseq->body->param.opt_table, VALUE, opt_num + 1); + } + else { + return NULL; + } +} + +static VALUE * +ibf_load_param_opt_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + int opt_num = body->param.opt_num; + + if (opt_num > 0) { + ibf_offset_t offset = IBF_OFFSET(body->param.opt_table); + VALUE *table = ALLOC_N(VALUE, opt_num+1); + MEMCPY(table, load->buff + offset, VALUE, opt_num+1); + return table; + } + else { + return NULL; + } +} + +static struct rb_iseq_param_keyword * +ibf_dump_param_keyword(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const struct rb_iseq_param_keyword *kw = iseq->body->param.keyword; + + if (kw) { + struct rb_iseq_param_keyword dump_kw = *kw; + int dv_num = kw->num - kw->required_num; + ID *ids = kw->num > 0 ? ALLOCA_N(ID, kw->num) : NULL; + VALUE *dvs = dv_num > 0 ? ALLOCA_N(VALUE, dv_num) : NULL; + int i; + + for (i=0; i<kw->num; i++) ids[i] = (ID)ibf_dump_id(dump, kw->table[i]); + for (i=0; i<dv_num; i++) dvs[i] = (VALUE)ibf_dump_object(dump, kw->default_values[i]); + + dump_kw.table = IBF_W(ids, ID, kw->num); + dump_kw.default_values = IBF_W(dvs, VALUE, dv_num); + return IBF_W(&dump_kw, struct rb_iseq_param_keyword, 1); + } + else { + return NULL; + } +} + +static const struct rb_iseq_param_keyword * +ibf_load_param_keyword(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + if (body->param.keyword) { + struct rb_iseq_param_keyword *kw = IBF_R(body->param.keyword, struct rb_iseq_param_keyword, 1); + ID *ids = IBF_R(kw->table, ID, kw->num); + int dv_num = kw->num - kw->required_num; + VALUE *dvs = IBF_R(kw->default_values, VALUE, dv_num); + int i; + + for (i=0; i<kw->num; i++) { + ids[i] = ibf_load_id(load, ids[i]); + } + for (i=0; i<dv_num; i++) { + dvs[i] = ibf_load_object(load, dvs[i]); + } + + kw->table = ids; + kw->default_values = dvs; + return kw; + } + else { + return NULL; + } +} + +static struct iseq_line_info_entry * +ibf_dump_line_info_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + return IBF_W(iseq->body->line_info_table, struct iseq_line_info_entry, iseq->body->line_info_size); +} + +static struct iseq_line_info_entry * +ibf_load_line_info_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + return IBF_R(body->line_info_table, struct iseq_line_info_entry, body->line_info_size); +} + +static ID * +ibf_dump_local_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const int size = iseq->body->local_size - 1; + ID *table = ALLOCA_N(ID, size); + int i; + + for (i=0; i<size; i++) { + table[i] = ibf_dump_id(dump, iseq->body->local_table[i]); + } + + return IBF_W(table, ID, size); +} + +static ID * +ibf_load_local_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + const int size = body->local_size - 1; + + if (size > 0) { + ID *table = IBF_R(body->local_table, ID, size); + int i; + + for (i=0; i<size; i++) { + table[i] = ibf_load_id(load, table[i]); + } + return table; + } + else { + return NULL; + } +} + +static struct iseq_catch_table * +ibf_dump_catch_table(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const struct iseq_catch_table *table = iseq->body->catch_table; + + if (table) { + int byte_size = iseq_catch_table_bytes(iseq->body->catch_table->size); + struct iseq_catch_table *dump_table = (struct iseq_catch_table *)ALLOCA_N(char, byte_size); + unsigned int i; + dump_table->size = table->size; + for (i=0; i<table->size; i++) { + dump_table->entries[i] = table->entries[i]; + dump_table->entries[i].iseq = ibf_dump_iseq(dump, table->entries[i].iseq); + } + return (struct iseq_catch_table *)(VALUE)ibf_dump_write(dump, dump_table, byte_size); + } + else { + return NULL; + } +} + +static struct iseq_catch_table * +ibf_load_catch_table(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + if (body->catch_table) { + struct iseq_catch_table *table; + unsigned int i; + unsigned int size; + size = *(unsigned int *)(load->buff + IBF_OFFSET(body->catch_table)); + table = ibf_load_alloc(load, IBF_OFFSET(body->catch_table), iseq_catch_table_bytes(size)); + for (i=0; i<size; i++) { + table->entries[i].iseq = ibf_load_iseq(load, table->entries[i].iseq); + } + return table; + } + else { + return NULL; + } +} + +static struct rb_call_info * +ibf_dump_ci_entries(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + const unsigned int ci_size = iseq->body->ci_size; + const unsigned int ci_kw_size = iseq->body->ci_kw_size; + const struct rb_call_info *ci_entries = iseq->body->ci_entries; + struct rb_call_info *dump_ci_entries; + struct rb_call_info_with_kwarg *dump_ci_kw_entries; + int byte_size = ci_size * sizeof(struct rb_call_info) + + ci_kw_size * sizeof(struct rb_call_info_with_kwarg); + unsigned int i; + + dump_ci_entries = (struct rb_call_info *)ALLOCA_N(char, byte_size); + dump_ci_kw_entries = (struct rb_call_info_with_kwarg *)&dump_ci_entries[ci_size]; + memcpy(dump_ci_entries, ci_entries, byte_size); + + for (i=0; i<ci_size; i++) { /* conver ID for each ci */ + dump_ci_entries[i].mid = ibf_dump_id(dump, dump_ci_entries[i].mid); + } + for (i=0; i<ci_kw_size; i++) { + const struct rb_call_info_kw_arg *kw_arg = dump_ci_kw_entries[i].kw_arg; + int j; + VALUE *keywords = ALLOCA_N(VALUE, kw_arg->keyword_len); + for (j=0; j<kw_arg->keyword_len; j++) { + keywords[j] = (VALUE)ibf_dump_object(dump, kw_arg->keywords[j]); /* kw_arg->keywords[n] is Symbol */ + } + dump_ci_kw_entries[i].kw_arg = (struct rb_call_info_kw_arg *)(VALUE)ibf_dump_write(dump, &kw_arg->keyword_len, sizeof(int)); + ibf_dump_write(dump, keywords, sizeof(VALUE) * kw_arg->keyword_len); + + dump_ci_kw_entries[i].ci.mid = ibf_dump_id(dump, dump_ci_kw_entries[i].ci.mid); + } + return (struct rb_call_info *)(VALUE)ibf_dump_write(dump, dump_ci_entries, byte_size); +} + +static struct rb_call_info * +ibf_load_ci_entries(const struct ibf_load *load, const struct rb_iseq_constant_body *body) +{ + unsigned int i; + const unsigned int ci_size = body->ci_size; + const unsigned int ci_kw_size = body->ci_kw_size; + struct rb_call_info *ci_entries = ibf_load_alloc(load, IBF_OFFSET(body->ci_entries), + sizeof(struct rb_call_info) * body->ci_size + + sizeof(struct rb_call_info_with_kwarg) * body->ci_kw_size); + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&ci_entries[ci_size]; + + for (i=0; i<ci_size; i++) { + ci_entries[i].mid = ibf_load_id(load, ci_entries[i].mid); + } + for (i=0; i<ci_kw_size; i++) { + int j; + ibf_offset_t kw_arg_offset = IBF_OFFSET(ci_kw_entries[i].kw_arg); + const int keyword_len = *(int *)(load->buff + kw_arg_offset); + const VALUE *keywords = (VALUE *)(load->buff + kw_arg_offset + sizeof(int)); + struct rb_call_info_kw_arg *kw_arg = ruby_xmalloc(sizeof(struct rb_call_info_kw_arg) + sizeof(VALUE) * (keyword_len - 1)); + kw_arg->keyword_len = keyword_len; + for (j=0; j<kw_arg->keyword_len; j++) { + kw_arg->keywords[j] = (VALUE)ibf_load_object(load, keywords[j]); + } + ci_kw_entries[i].kw_arg = kw_arg; + ci_kw_entries[i].ci.mid = ibf_load_id(load, ci_kw_entries[i].ci.mid); + } + + return ci_entries; +} + +static ibf_offset_t +ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) +{ + struct rb_iseq_constant_body dump_body; + dump_body = *iseq->body; + + dump_body.location.path = ibf_dump_object(dump, dump_body.location.path); + dump_body.location.absolute_path = ibf_dump_object(dump, dump_body.location.absolute_path); + dump_body.location.base_label = ibf_dump_object(dump, dump_body.location.base_label); + dump_body.location.label = ibf_dump_object(dump, dump_body.location.label); + + dump_body.iseq_encoded = ibf_dump_code(dump, iseq); + dump_body.param.opt_table = ibf_dump_param_opt_table(dump, iseq); + dump_body.param.keyword = ibf_dump_param_keyword(dump, iseq); + dump_body.line_info_table = ibf_dump_line_info_table(dump, iseq); + dump_body.local_table = ibf_dump_local_table(dump, iseq); + dump_body.catch_table = ibf_dump_catch_table(dump, iseq); + dump_body.parent_iseq = ibf_dump_iseq(dump, iseq->body->parent_iseq); + dump_body.local_iseq = ibf_dump_iseq(dump, iseq->body->local_iseq); + dump_body.is_entries = NULL; + dump_body.ci_entries = ibf_dump_ci_entries(dump, iseq); + dump_body.cc_entries = NULL; + dump_body.mark_ary = ISEQ_FLIP_CNT(iseq); + + return ibf_dump_write(dump, &dump_body, sizeof(dump_body)); +} + +static VALUE +ibf_load_location_str(const struct ibf_load *load, VALUE str_index) +{ + VALUE str = ibf_load_object(load, str_index); + if (str != Qnil) { + str = rb_fstring(str); + } + return str; +} + +static void +ibf_load_iseq_each(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) +{ + struct rb_iseq_constant_body *load_body = iseq->body = ZALLOC(struct rb_iseq_constant_body); + const struct rb_iseq_constant_body *body = (struct rb_iseq_constant_body *)(load->buff + offset); + + /* memcpy(load_body, load->buff + offset, sizeof(*load_body)); */ + load_body->type = body->type; + load_body->stack_max = body->stack_max; + load_body->local_size = body->local_size; + load_body->iseq_size = body->iseq_size; + load_body->param = body->param; + load_body->local_table_size = body->local_table_size; + load_body->is_size = body->is_size; + load_body->ci_size = body->ci_size; + load_body->ci_kw_size = body->ci_kw_size; + load_body->line_info_size = body->line_info_size; + + RB_OBJ_WRITE(iseq, &load_body->mark_ary, iseq_mark_ary_create((int)body->mark_ary)); + + RB_OBJ_WRITE(iseq, &load_body->location.path, ibf_load_location_str(load, body->location.path)); + RB_OBJ_WRITE(iseq, &load_body->location.absolute_path, ibf_load_location_str(load, body->location.absolute_path)); + RB_OBJ_WRITE(iseq, &load_body->location.base_label, ibf_load_location_str(load, body->location.base_label)); + RB_OBJ_WRITE(iseq, &load_body->location.label, ibf_load_location_str(load, body->location.label)); + load_body->location.first_lineno = body->location.first_lineno; + + load_body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, body->is_size); + load_body->ci_entries = ibf_load_ci_entries(load, body); + load_body->cc_entries = ZALLOC_N(struct rb_call_cache, body->ci_size + body->ci_kw_size); + load_body->param.opt_table = ibf_load_param_opt_table(load, body); + load_body->param.keyword = ibf_load_param_keyword(load, body); + load_body->line_info_table = ibf_load_line_info_table(load, body); + load_body->local_table = ibf_load_local_table(load, body); + load_body->catch_table = ibf_load_catch_table(load, body); + load_body->parent_iseq = ibf_load_iseq(load, body->parent_iseq); + load_body->local_iseq = ibf_load_iseq(load, body->local_iseq); + + load_body->iseq_encoded = ibf_load_code(load, iseq, body); + + rb_iseq_translate_threaded_code(iseq); +} + + +static void +ibf_dump_iseq_list(struct ibf_dump *dump, struct ibf_header *header) +{ + const long size = RARRAY_LEN(dump->iseq_list); + ibf_offset_t *list = ALLOCA_N(ibf_offset_t, size); + long i; + + for (i=0; i<size; i++) { + list[i] = (ibf_offset_t)NUM2LONG(rb_ary_entry(dump->iseq_list, i)); + } + + header->iseq_list_offset = ibf_dump_write(dump, list, sizeof(ibf_offset_t) * size); + header->iseq_list_size = (unsigned int)size; +} + +struct ibf_dump_id_list_i_arg { + struct ibf_dump *dump; + long *list; + int current_i; +}; + +static int +ibf_dump_id_list_i(st_data_t key, st_data_t val, st_data_t ptr) +{ + struct ibf_dump_id_list_i_arg *arg = (struct ibf_dump_id_list_i_arg *)ptr; + int i = (int)val; + ID id = (ID)key; + assert(arg->current_i == i); + arg->current_i++; + + if (rb_id2name(id)) { + arg->list[i] = (long)ibf_dump_object(arg->dump, rb_id2str(id)); + } + else { + arg->list[i] = 0; + } + + return ST_CONTINUE; +} + +static void +ibf_dump_id_list(struct ibf_dump *dump, struct ibf_header *header) +{ + const long size = dump->id_table->num_entries; + struct ibf_dump_id_list_i_arg arg; + arg.list = ALLOCA_N(long, size); + arg.dump = dump; + arg.current_i = 0; + + st_foreach(dump->id_table, ibf_dump_id_list_i, (st_data_t)&arg); + + header->id_list_offset = ibf_dump_write(dump, arg.list, sizeof(long) * size); + header->id_list_size = (unsigned int)size; +} + +#define IBF_OBJECT_INTERNAL FL_PROMOTED0 + +/* + * Binary format + * - ibf_object_header + * - ibf_object_xxx (xxx is type) + */ + +struct ibf_object_header { + unsigned int type: 5; + unsigned int special_const: 1; + unsigned int frozen: 1; + unsigned int internal: 1; +}; + +enum ibf_object_class_index { + IBF_OBJECT_CLASS_OBJECT, + IBF_OBJECT_CLASS_ARRAY, + IBF_OBJECT_CLASS_STANDARD_ERROR +}; + +struct ibf_object_string { + long encindex; + long len; + char ptr[1]; +}; + +struct ibf_object_regexp { + long srcstr; + char option; +}; + +struct ibf_object_array { + long len; + long ary[1]; +}; + +struct ibf_object_hash { + long len; + long keyval[1]; +}; + +struct ibf_object_struct_range { + long class_index; + long len; + long beg; + long end; + int excl; +}; + +struct ibf_object_bignum { + ssize_t slen; + BDIGIT digits[1]; +}; + +enum ibf_object_data_type { + IBF_OBJECT_DATA_ENCODING +}; + +struct ibf_object_complex_rational { + long a, b; +}; + +struct ibf_object_symbol { + long str; +}; + +#define IBF_OBJHEADER(offset) (struct ibf_object_header *)(load->buff + (offset)) +#define IBF_OBJBODY(type, offset) (type *)(load->buff + sizeof(struct ibf_object_header) + (offset)) + +static void +ibf_dump_object_unsupported(struct ibf_dump *dump, VALUE obj) +{ + rb_obj_info_dump(obj); + rb_bug("ibf_dump_object_unsupported: unsupporetd"); +} + +static VALUE +ibf_load_object_unsupported(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + rb_bug("unsupported"); + return Qnil; +} + +static void +ibf_dump_object_class(struct ibf_dump *dump, VALUE obj) +{ + enum ibf_object_class_index cindex; + if (obj == rb_cObject) { + cindex = IBF_OBJECT_CLASS_OBJECT; + } + else if (obj == rb_cArray) { + cindex = IBF_OBJECT_CLASS_ARRAY; + } + else if (obj == rb_eStandardError) { + cindex = IBF_OBJECT_CLASS_STANDARD_ERROR; + } + else { + rb_obj_info_dump(obj); + rb_p(obj); + rb_bug("unsupported class"); + } + ibf_dump_write(dump, &cindex, sizeof(cindex)); +} + +static VALUE +ibf_load_object_class(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + enum ibf_object_class_index *cindexp = IBF_OBJBODY(enum ibf_object_class_index, offset); + enum ibf_object_class_index cindex = *cindexp; + + switch (cindex) { + case IBF_OBJECT_CLASS_OBJECT: + return rb_cObject; + case IBF_OBJECT_CLASS_ARRAY: + return rb_cArray; + case IBF_OBJECT_CLASS_STANDARD_ERROR: + return rb_eStandardError; + } + + rb_bug("ibf_load_object_class: unknown class (%d)", (int)cindex); +} + + +static void +ibf_dump_object_float(struct ibf_dump *dump, VALUE obj) +{ + double dbl = RFLOAT_VALUE(obj); + ibf_dump_write(dump, &dbl, sizeof(dbl)); +} + +static VALUE +ibf_load_object_float(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + double *dblp = IBF_OBJBODY(double, offset); + return DBL2NUM(*dblp); +} + +static void +ibf_dump_object_string(struct ibf_dump *dump, VALUE obj) +{ + long encindex = (long)rb_enc_get_index(obj); + long len = RSTRING_LEN(obj); + const char *ptr = RSTRING_PTR(obj); + + if (encindex > RUBY_ENCINDEX_BUILTIN_MAX) { + rb_encoding *enc = rb_enc_from_index((int)encindex); + const char *enc_name = rb_enc_name(enc); + encindex = RUBY_ENCINDEX_BUILTIN_MAX + ibf_dump_object(dump, rb_str_new2(enc_name)); + } + + IBF_WV(encindex); + IBF_WV(len); + IBF_WP(ptr, char, len); +} + +static VALUE +ibf_load_object_string(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_string *string = IBF_OBJBODY(struct ibf_object_string, offset); + VALUE str = rb_str_new(string->ptr, string->len); + int encindex = (int)string->encindex; + + if (encindex > RUBY_ENCINDEX_BUILTIN_MAX) { + VALUE enc_name_str = ibf_load_object(load, encindex - RUBY_ENCINDEX_BUILTIN_MAX); + encindex = rb_enc_find_index(RSTRING_PTR(enc_name_str)); + } + rb_enc_associate_index(str, encindex); + + if (header->internal) rb_obj_hide(str); + if (header->frozen) rb_obj_freeze(str); + + return str; +} + +static void +ibf_dump_object_regexp(struct ibf_dump *dump, VALUE obj) +{ + struct ibf_object_regexp regexp; + regexp.srcstr = RREGEXP_SRC(obj); + regexp.option = (char)rb_reg_options(obj); + regexp.srcstr = (long)ibf_dump_object(dump, regexp.srcstr); + IBF_WV(regexp); +} + +static VALUE +ibf_load_object_regexp(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_regexp *regexp = IBF_OBJBODY(struct ibf_object_regexp, offset); + VALUE srcstr = ibf_load_object(load, regexp->srcstr); + VALUE reg = rb_reg_compile(srcstr, (int)regexp->option, NULL, 0); + + if (header->internal) rb_obj_hide(reg); + if (header->frozen) rb_obj_freeze(reg); + + return reg; +} + +static void +ibf_dump_object_array(struct ibf_dump *dump, VALUE obj) +{ + long i, len = (int)RARRAY_LEN(obj); + IBF_WV(len); + for (i=0; i<len; i++) { + long index = (long)ibf_dump_object(dump, RARRAY_AREF(obj, i)); + IBF_WV(index); + } +} + +static VALUE +ibf_load_object_array(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_array *array = IBF_OBJBODY(struct ibf_object_array, offset); + VALUE ary = rb_ary_new_capa(array->len); + int i; + + for (i=0; i<array->len; i++) { + rb_ary_push(ary, ibf_load_object(load, array->ary[i])); + } + + if (header->internal) rb_obj_hide(ary); + if (header->frozen) rb_obj_freeze(ary); + + return ary; +} + +static int +ibf_dump_object_hash_i(st_data_t key, st_data_t val, st_data_t ptr) +{ + struct ibf_dump *dump = (struct ibf_dump *)ptr; + long key_index = (long)ibf_dump_object(dump, (VALUE)key); + long val_index = (long)ibf_dump_object(dump, (VALUE)val); + IBF_WV(key_index); + IBF_WV(val_index); + return ST_CONTINUE; +} + +static void +ibf_dump_object_hash(struct ibf_dump *dump, VALUE obj) +{ + long len = RHASH_SIZE(obj); + IBF_WV(len); + if (len > 0) st_foreach(RHASH(obj)->ntbl, ibf_dump_object_hash_i, (st_data_t)dump); +} + +static VALUE +ibf_load_object_hash(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_hash *hash = IBF_OBJBODY(struct ibf_object_hash, offset); + VALUE obj = rb_hash_new(); + int i; + + for (i=0; i<hash->len; i++) { + VALUE key = ibf_load_object(load, hash->keyval[i*2 ]); + VALUE val = ibf_load_object(load, hash->keyval[i*2+1]); + rb_hash_aset(obj, key, val); + } + + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + + return obj; +} + +static void +ibf_dump_object_struct(struct ibf_dump *dump, VALUE obj) +{ + if (rb_obj_is_kind_of(obj, rb_cRange)) { + struct ibf_object_struct_range range; + VALUE beg, end; + range.len = 3; + range.class_index = 0; + + rb_range_values(obj, &beg, &end, &range.excl); + range.beg = (long)ibf_dump_object(dump, beg); + range.end = (long)ibf_dump_object(dump, end); + + IBF_WV(range); + } + else { + rb_bug("ibf_dump_object_struct: unsupported class"); + } +} + +static VALUE +ibf_load_object_struct(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_struct_range *range = IBF_OBJBODY(struct ibf_object_struct_range, offset); + VALUE beg = ibf_load_object(load, range->beg); + VALUE end = ibf_load_object(load, range->end); + VALUE obj = rb_range_new(beg, end, range->excl); + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_bignum(struct ibf_dump *dump, VALUE obj) +{ + ssize_t len = BIGNUM_LEN(obj); + ssize_t slen = BIGNUM_SIGN(obj) > 0 ? len : len * -1; + BDIGIT *d = BIGNUM_DIGITS(obj); + + IBF_WV(slen); + IBF_WP(d, BDIGIT, len); +} + +static VALUE +ibf_load_object_bignum(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_bignum *bignum = IBF_OBJBODY(struct ibf_object_bignum, offset); + int sign = bignum->slen > 0; + ssize_t len = sign > 0 ? bignum->slen : -1 * bignum->slen; + VALUE obj = rb_integer_unpack(bignum->digits, len * 2, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == 0 ? INTEGER_PACK_NEGATIVE : 0)); + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_data(struct ibf_dump *dump, VALUE obj) +{ + if (rb_data_is_encoding(obj)) { + rb_encoding *enc = rb_to_encoding(obj); + const char *name = rb_enc_name(enc); + enum ibf_object_data_type type = IBF_OBJECT_DATA_ENCODING; + long len = strlen(name) + 1; + IBF_WV(type); + IBF_WV(len); + IBF_WP(name, char, strlen(name) + 1); + } + else { + ibf_dump_object_unsupported(dump, obj); + } +} + +static VALUE +ibf_load_object_data(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const enum ibf_object_data_type *typep = IBF_OBJBODY(enum ibf_object_data_type, offset); + /* const long *lenp = IBF_OBJBODY(long, offset + sizeof(enum ibf_object_data_type)); */ + const char *data = IBF_OBJBODY(char, offset + sizeof(enum ibf_object_data_type) + sizeof(long)); + + switch (*typep) { + case IBF_OBJECT_DATA_ENCODING: + { + VALUE encobj = rb_enc_from_encoding(rb_enc_find(data)); + return encobj; + } + } + + return ibf_load_object_unsupported(load, header, offset); +} + +static void +ibf_dump_object_complex_rational(struct ibf_dump *dump, VALUE obj) +{ + long real = (long)ibf_dump_object(dump, RCOMPLEX(obj)->real); + long imag = (long)ibf_dump_object(dump, RCOMPLEX(obj)->imag); + + IBF_WV(real); + IBF_WV(imag); +} + +static VALUE +ibf_load_object_complex_rational(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + const struct ibf_object_complex_rational *nums = IBF_OBJBODY(struct ibf_object_complex_rational, offset); + VALUE a = ibf_load_object(load, nums->a); + VALUE b = ibf_load_object(load, nums->b); + VALUE obj = header->type == T_COMPLEX ? + rb_complex_new(a, b) : rb_rational_new(a, b); + + if (header->internal) rb_obj_hide(obj); + if (header->frozen) rb_obj_freeze(obj); + return obj; +} + +static void +ibf_dump_object_symbol(struct ibf_dump *dump, VALUE obj) +{ + VALUE str = rb_sym2str(obj); + long str_index = (long)ibf_dump_object(dump, str); + IBF_WV(str_index); +} + +static VALUE +ibf_load_object_symbol(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset) +{ + /* const struct ibf_object_header *header = IBF_OBJHEADER(offset); */ + const struct ibf_object_symbol *symbol = IBF_OBJBODY(struct ibf_object_symbol, offset); + VALUE str = ibf_load_object(load, symbol->str); + ID id = rb_intern_str(str); + return ID2SYM(id); +} + +typedef void (*ibf_dump_object_function)(struct ibf_dump *dump, VALUE obj); +static ibf_dump_object_function dump_object_functions[RUBY_T_MASK+1] = { + ibf_dump_object_unsupported, /* T_NONE */ + ibf_dump_object_unsupported, /* T_OBJECT */ + ibf_dump_object_class, /* T_CLASS */ + ibf_dump_object_unsupported, /* T_MODULE */ + ibf_dump_object_float, /* T_FLOAT */ + ibf_dump_object_string, /* T_STRING */ + ibf_dump_object_regexp, /* T_REGEXP */ + ibf_dump_object_array, /* T_ARRAY */ + ibf_dump_object_hash, /* T_HASH */ + ibf_dump_object_struct, /* T_STRUCT */ + ibf_dump_object_bignum, /* T_BIGNUM */ + ibf_dump_object_unsupported, /* T_FILE */ + ibf_dump_object_data, /* T_DATA */ + ibf_dump_object_unsupported, /* T_MATCH */ + ibf_dump_object_complex_rational, /* T_COMPLEX */ + ibf_dump_object_complex_rational, /* T_RATIONAL */ + ibf_dump_object_unsupported, /* 0x10 */ + ibf_dump_object_unsupported, /* 0x11 T_NIL */ + ibf_dump_object_unsupported, /* 0x12 T_TRUE */ + ibf_dump_object_unsupported, /* 0x13 T_FALSE */ + ibf_dump_object_symbol, /* 0x14 T_SYMBOL */ + ibf_dump_object_unsupported, /* T_FIXNUM */ + ibf_dump_object_unsupported, /* T_UNDEF */ + ibf_dump_object_unsupported, /* 0x17 */ + ibf_dump_object_unsupported, /* 0x18 */ + ibf_dump_object_unsupported, /* 0x19 */ + ibf_dump_object_unsupported, /* T_IMEMO 0x1a */ + ibf_dump_object_unsupported, /* T_NODE 0x1b */ + ibf_dump_object_unsupported, /* T_ICLASS 0x1c */ + ibf_dump_object_unsupported, /* T_ZOMBIE 0x1d */ + ibf_dump_object_unsupported, /* 0x1e */ + ibf_dump_object_unsupported /* 0x1f */ +}; + +static ibf_offset_t +lbf_dump_object_object(struct ibf_dump *dump, VALUE obj) +{ + struct ibf_object_header obj_header; + ibf_offset_t current_offset = ibf_dump_pos(dump); + obj_header.type = TYPE(obj); + + if (SPECIAL_CONST_P(obj)) { + if (RB_TYPE_P(obj, T_SYMBOL) || + RB_TYPE_P(obj, T_FLOAT)) { + obj_header.internal = FALSE; + goto dump_object; + } + obj_header.special_const = TRUE; + obj_header.frozen = TRUE; + obj_header.internal = TRUE; + IBF_WV(obj_header); + IBF_WV(obj); + } + else { + obj_header.internal = (RBASIC_CLASS(obj) == 0) ? TRUE : FALSE; + dump_object: + obj_header.special_const = FALSE; + obj_header.frozen = FL_TEST(obj, FL_FREEZE) ? TRUE : FALSE; + IBF_WV(obj_header); + (*dump_object_functions[obj_header.type])(dump, obj); + } + + return current_offset; +} + +typedef VALUE (*ibf_load_object_function)(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t); +static ibf_load_object_function load_object_functions[RUBY_T_MASK+1] = { + ibf_load_object_unsupported, /* T_NONE */ + ibf_load_object_unsupported, /* T_OBJECT */ + ibf_load_object_class, /* T_CLASS */ + ibf_load_object_unsupported, /* T_MODULE */ + ibf_load_object_float, /* T_FLOAT */ + ibf_load_object_string, /* T_STRING */ + ibf_load_object_regexp, /* T_REGEXP */ + ibf_load_object_array, /* T_ARRAY */ + ibf_load_object_hash, /* T_HASH */ + ibf_load_object_struct, /* T_STRUCT */ + ibf_load_object_bignum, /* T_BIGNUM */ + ibf_load_object_unsupported, /* T_FILE */ + ibf_load_object_data, /* T_DATA */ + ibf_load_object_unsupported, /* T_MATCH */ + ibf_load_object_complex_rational, /* T_COMPLEX */ + ibf_load_object_complex_rational, /* T_RATIONAL */ + ibf_load_object_unsupported, /* 0x10 */ + ibf_load_object_unsupported, /* T_NIL */ + ibf_load_object_unsupported, /* T_TRUE */ + ibf_load_object_unsupported, /* T_FALSE */ + ibf_load_object_symbol, + ibf_load_object_unsupported, /* T_FIXNUM */ + ibf_load_object_unsupported, /* T_UNDEF */ + ibf_load_object_unsupported, /* 0x17 */ + ibf_load_object_unsupported, /* 0x18 */ + ibf_load_object_unsupported, /* 0x19 */ + ibf_load_object_unsupported, /* T_IMEMO 0x1a */ + ibf_load_object_unsupported, /* T_NODE 0x1b */ + ibf_load_object_unsupported, /* T_ICLASS 0x1c */ + ibf_load_object_unsupported, /* T_ZOMBIE 0x1d */ + ibf_load_object_unsupported, /* 0x1e */ + ibf_load_object_unsupported /* 0x1f */ +}; + +static VALUE +ibf_load_object(const struct ibf_load *load, VALUE object_index) +{ + if (object_index == 0) { + return Qnil; + } + else { + VALUE obj = rb_ary_entry(load->obj_list, (long)object_index); + if (obj == Qnil) { /* TODO: avoid multiple Qnil load */ + ibf_offset_t *offsets = (ibf_offset_t *)(load->header->object_list_offset + load->buff); + ibf_offset_t offset = offsets[object_index]; + const struct ibf_object_header *header = IBF_OBJHEADER(offset); + + if (header->special_const) { + VALUE *vp = IBF_OBJBODY(VALUE, offset); + obj = *vp; + } + else { + obj = (*load_object_functions[header->type])(load, header, offset); + } + + rb_ary_store(load->obj_list, (long)object_index, obj); + } + iseq_add_mark_object(load->iseq, obj); + return obj; + } +} + +static void +ibf_dump_object_list(struct ibf_dump *dump, struct ibf_header *header) +{ + VALUE list = rb_ary_tmp_new(RARRAY_LEN(dump->obj_list)); + int i, size; + + for (i=0; i<RARRAY_LEN(dump->obj_list); i++) { + VALUE obj = RARRAY_AREF(dump->obj_list, i); + ibf_offset_t offset = lbf_dump_object_object(dump, obj); + rb_ary_push(list, UINT2NUM(offset)); + } + size = i; + header->object_list_offset = ibf_dump_pos(dump); + + for (i=0; i<size; i++) { + ibf_offset_t offset = NUM2UINT(RARRAY_AREF(list, i)); + IBF_WV(offset); + } + + header->object_list_size = size; +} + +VALUE +iseq_ibf_dump(const rb_iseq_t *iseq, VALUE opt) +{ + struct ibf_dump dump; + struct ibf_header header; + + dump.str = rb_str_new(0, 0); + dump.iseq_list = rb_ary_tmp_new(0); + dump.obj_list = rb_ary_tmp_new(1); rb_ary_push(dump.obj_list, Qnil); /* 0th is nil */ + dump.iseq_table = st_init_numtable(); /* need free */ + dump.id_table = st_init_numtable(); /* need free */ + + ibf_table_index(dump.id_table, 0); /* id_index:0 is 0 */ + + if (iseq->body->parent_iseq != NULL || + iseq->body->local_iseq != iseq) { + rb_raise(rb_eRuntimeError, "should be top of iseq"); + } + if (RTEST(ISEQ_COVERAGE(iseq))) { + rb_raise(rb_eRuntimeError, "should not compile with coverage"); + } + + ibf_dump_write(&dump, &header, sizeof(header)); + ibf_dump_write(&dump, RUBY_PLATFORM, strlen(RUBY_PLATFORM) + 1); + ibf_dump_iseq(&dump, iseq); + + header.magic[0] = 'Y'; /* YARB */ + header.magic[1] = 'A'; + header.magic[2] = 'R'; + header.magic[3] = 'B'; + header.major_version = ISEQ_MAJOR_VERSION; + header.minor_version = ISEQ_MINOR_VERSION; + ibf_dump_iseq_list(&dump, &header); + ibf_dump_id_list(&dump, &header); + ibf_dump_object_list(&dump, &header); + header.size = ibf_dump_pos(&dump); + + if (RTEST(opt)) { + VALUE opt_str = rb_check_string_type(opt); + header.extra_size = RSTRING_LEN(opt_str) + 1; + ibf_dump_write(&dump, RSTRING_PTR(opt_str), header.extra_size); + } + else { + header.extra_size = 0; + } + + ibf_dump_overwrite(&dump, &header, sizeof(header), 0); + + /* release. TODO: no need to care exceptions? */ + st_free_table(dump.iseq_table); + st_free_table(dump.id_table); + return dump.str; +} + +static const ibf_offset_t * +ibf_iseq_list(const struct ibf_load *load) +{ + return (ibf_offset_t *)(load->buff + load->header->iseq_list_offset); +} + +void +ibf_load_iseq_complete(rb_iseq_t *iseq) +{ + struct ibf_load *load = RTYPEDDATA_DATA(iseq->aux.loader.obj); + rb_iseq_t *prev_src_iseq = load->iseq; + load->iseq = iseq; + ibf_load_iseq_each(load, iseq, ibf_iseq_list(load)[iseq->aux.loader.index]); + ISEQ_COMPILE_DATA(iseq) = NULL; + FL_UNSET(iseq, ISEQ_NOT_LOADED_YET); + load->iseq = prev_src_iseq; +} + +#if USE_LAZY_LOAD +const rb_iseq_t * +rb_iseq_complete(const rb_iseq_t *iseq) +{ + ibf_load_iseq_complete((rb_iseq_t *)iseq); + return iseq; +} +#endif + +static rb_iseq_t * +ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq) +{ + int iseq_index = (int)(VALUE)index_iseq; + + if (iseq_index == -1) { + return NULL; + } + else { + VALUE iseqv = rb_ary_entry(load->iseq_list, iseq_index); + + if (iseqv != Qnil) { + return (rb_iseq_t *)iseqv; + } + else { + rb_iseq_t *iseq = iseq_imemo_alloc(); + FL_SET(iseq, ISEQ_NOT_LOADED_YET); + iseq->aux.loader.obj = load->loader_obj; + iseq->aux.loader.index = iseq_index; + rb_ary_store(load->iseq_list, iseq_index, (VALUE)iseq); + +#if !USE_LAZY_LOAD + ibf_load_iseq_complete(iseq); +#endif /* !USE_LAZY_LOAD */ + + if (load->iseq) { + iseq_add_mark_object(load->iseq, (VALUE)iseq); + } + return iseq; + } + } +} + +static void +ibf_setup_load(struct ibf_load *load, VALUE loader_obj, VALUE str) +{ + RB_OBJ_WRITE(loader_obj, &load->str, str); + load->loader_obj = loader_obj; + load->buff = StringValuePtr(str); + load->header = (struct ibf_header *)load->buff; + RB_OBJ_WRITE(loader_obj, &load->iseq_list, rb_ary_tmp_new(0)); + RB_OBJ_WRITE(loader_obj, &load->obj_list, rb_ary_tmp_new(0)); + load->id_list = ZALLOC_N(ID, load->header->id_list_size); + load->iseq = NULL; +} + +static void +ibf_loader_mark(void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + rb_gc_mark(load->str); + rb_gc_mark(load->iseq_list); + rb_gc_mark(load->obj_list); + } +} + +static void +ibf_loader_free(void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + ruby_xfree(load->id_list); + ruby_xfree(load); + } +} + +static size_t +ibf_loader_memsize(const void *ptr) +{ + if (ptr) { + struct ibf_load *load = (struct ibf_load *)ptr; + return sizeof(struct ibf_load) + load->header->id_list_size * sizeof(ID); + } + else { + return 0; + } +} + +static const rb_data_type_t ibf_load_type = { + "ibf_loader", + {ibf_loader_mark, ibf_loader_free, ibf_loader_memsize,}, + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY +}; + +const rb_iseq_t * +iseq_ibf_load(VALUE str) +{ + struct ibf_load *load; + const rb_iseq_t *iseq; + VALUE loader_obj = TypedData_Make_Struct(0, struct ibf_load, &ibf_load_type, load); + + ibf_setup_load(load, loader_obj, str); + iseq = ibf_load_iseq(load, 0); + + RB_GC_GUARD(loader_obj); + return iseq; +} + +VALUE +iseq_ibf_load_extra_data(VALUE str) +{ + struct ibf_load *load; + VALUE loader_obj = TypedData_Make_Struct(0, struct ibf_load, &ibf_load_type, load); + VALUE extra_str; + + ibf_setup_load(load, loader_obj, str); + extra_str = rb_str_new2(load->buff + load->header->extra_size); + RB_GC_GUARD(loader_obj); + return extra_str; +} + diff --git a/encoding.c b/encoding.c index eb777c90f3..b030f21875 100644 --- a/encoding.c +++ b/encoding.c @@ -86,6 +86,12 @@ static const rb_data_type_t encoding_data_type = { #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type) #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj)) +int +rb_data_is_encoding(VALUE obj) +{ + return is_data_encoding(obj); +} + static VALUE enc_new(rb_encoding *encoding) { @@ -928,13 +928,15 @@ defineclass rb_bug("unknown defineclass type: %d", (int)type); } + rb_iseq_check(class_iseq); + /* enter scope */ vm_push_frame(th, class_iseq, VM_FRAME_MAGIC_CLASS, klass, VM_ENVVAL_BLOCK_PTR(GET_BLOCK_PTR()), (VALUE)vm_cref_push(th, klass, NULL, FALSE), class_iseq->body->iseq_encoded, GET_SP(), - class_iseq->body->local_size, class_iseq->body->stack_max); - + class_iseq->body->local_size, + class_iseq->body->stack_max); RESTORE_REGS(); NEXT_INSN(); } diff --git a/internal.h b/internal.h index df594c171f..ca630ddbec 100644 --- a/internal.h +++ b/internal.h @@ -725,6 +725,7 @@ void rb_encdb_declare(const char *name); void rb_enc_set_base(const char *name, const char *orig); int rb_enc_set_dummy(int index); void rb_encdb_set_unicode(int index); +int rb_data_is_encoding(VALUE obj); /* enum.c */ VALUE rb_f_send(int argc, VALUE *argv, VALUE recv); @@ -25,9 +25,6 @@ #include "insns.inc" #include "insns_info.inc" -#define ISEQ_MAJOR_VERSION 2 -#define ISEQ_MINOR_VERSION 3 - VALUE rb_cISeq; static VALUE iseqw_new(const rb_iseq_t *iseq); static const rb_iseq_t *iseqw_check(VALUE iseqw); @@ -71,30 +68,32 @@ rb_iseq_free(const rb_iseq_t *iseq) RUBY_FREE_ENTER("iseq"); if (iseq) { - ruby_xfree((void *)iseq->body->iseq_encoded); - ruby_xfree((void *)iseq->body->line_info_table); - ruby_xfree((void *)iseq->body->local_table); - ruby_xfree((void *)iseq->body->is_entries); - - if (iseq->body->ci_entries) { - unsigned int i; - struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; - for (i=0; i<iseq->body->ci_kw_size; i++) { - const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg; - ruby_xfree((void *)kw_arg); + if (iseq->body) { + ruby_xfree((void *)iseq->body->iseq_encoded); + ruby_xfree((void *)iseq->body->line_info_table); + ruby_xfree((void *)iseq->body->local_table); + ruby_xfree((void *)iseq->body->is_entries); + + if (iseq->body->ci_entries) { + unsigned int i; + struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&iseq->body->ci_entries[iseq->body->ci_size]; + for (i=0; i<iseq->body->ci_kw_size; i++) { + const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg; + ruby_xfree((void *)kw_arg); + } + ruby_xfree(iseq->body->ci_entries); + ruby_xfree(iseq->body->cc_entries); } - ruby_xfree(iseq->body->ci_entries); - ruby_xfree(iseq->body->cc_entries); - } - ruby_xfree((void *)iseq->body->catch_table); - ruby_xfree((void *)iseq->body->param.opt_table); + ruby_xfree((void *)iseq->body->catch_table); + ruby_xfree((void *)iseq->body->param.opt_table); - if (iseq->body->param.keyword != NULL) { - ruby_xfree((void *)iseq->body->param.keyword->default_values); - ruby_xfree((void *)iseq->body->param.keyword); + if (iseq->body->param.keyword != NULL) { + ruby_xfree((void *)iseq->body->param.keyword->default_values); + ruby_xfree((void *)iseq->body->param.keyword); + } + compile_data_free(ISEQ_COMPILE_DATA(iseq)); + ruby_xfree(iseq->body); } - compile_data_free(ISEQ_COMPILE_DATA(iseq)); - ruby_xfree(iseq->body); } RUBY_FREE_LEAVE("iseq"); } @@ -116,9 +115,11 @@ rb_iseq_mark(const rb_iseq_t *iseq) RUBY_MARK_UNLESS_NULL(body->location.absolute_path); } - if (ISEQ_COMPILE_DATA(iseq) != 0) { + if (FL_TEST(iseq, ISEQ_NOT_LOADED_YET)) { + rb_gc_mark(iseq->aux.loader.obj); + } + else if (ISEQ_COMPILE_DATA(iseq) != 0) { const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); - RUBY_MARK_UNLESS_NULL(compile_data->mark_ary); RUBY_MARK_UNLESS_NULL(compile_data->err_info); RUBY_MARK_UNLESS_NULL(compile_data->catch_table_ary); @@ -205,7 +206,7 @@ iseq_memsize(const rb_iseq_t *iseq) static rb_iseq_t * iseq_alloc(void) { - rb_iseq_t *iseq = (rb_iseq_t *)rb_imemo_new(imemo_iseq, 0, 0, 0, 0); + rb_iseq_t *iseq = iseq_imemo_alloc(); iseq->body = ZALLOC(struct rb_iseq_constant_body); return iseq; } @@ -260,16 +261,6 @@ rb_iseq_add_mark_object(const rb_iseq_t *iseq, VALUE obj) } static VALUE -iseq_mark_ary_create(int flip_cnt) -{ - VALUE ary = rb_ary_tmp_new(3); - rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_COVERAGE */ - rb_ary_push(ary, INT2FIX(flip_cnt)); /* ISEQ_MARK_ARY_FLIP_CNT */ - rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_ORIGINAL_ISEQ */ - return ary; -} - -static VALUE prepare_iseq_build(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno, const rb_iseq_t *parent, enum iseq_type type, @@ -485,6 +476,19 @@ rb_iseq_new_with_opt(NODE *node, VALUE name, VALUE path, VALUE absolute_path, return iseq_translate(iseq); } +const rb_iseq_t * +rb_iseq_load_iseq(VALUE fname) +{ + if (rb_respond_to(rb_cISeq, rb_intern("load_iseq"))) { + VALUE iseqv = rb_funcall(rb_cISeq, rb_intern("load_iseq"), 1, fname); + if (CLASS_OF(iseqv) == rb_cISeq) { + return iseqw_check(iseqv); + } + } + + return NULL; +} + #define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") #define CHECK_HASH(v) rb_convert_type((v), T_HASH, "Hash", "to_hash") #define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") @@ -583,8 +587,7 @@ static VALUE iseq_s_load(int argc, VALUE *argv, VALUE self) { VALUE data, opt=Qnil; - rb_scan_args(argc, argv, "11", &data, &opt); - + rb_scan_args(argc, argv, "01", &opt); return iseq_load(data, NULL, opt); } @@ -892,7 +895,11 @@ iseqw_s_compile_option_get(VALUE self) static const rb_iseq_t * iseqw_check(VALUE iseqw) { - const rb_iseq_t *iseq = DATA_PTR(iseqw); + rb_iseq_t *iseq = DATA_PTR(iseqw); + + if (!iseq->body) { + ibf_load_iseq_complete(iseq); + } if (!iseq->body->location.label) { rb_raise(rb_eTypeError, "uninitialized InstructionSequence"); @@ -1235,7 +1242,7 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, { const char *types = insn_op_types(insn); char type = types[op_no]; - VALUE ret; + VALUE ret = Qundef; switch (type) { case TS_OFFSET: /* LONG */ @@ -1281,8 +1288,8 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, case TS_ISEQ: /* iseq */ { - rb_iseq_t *iseq = (rb_iseq_t *)op; - if (iseq) { + if (op) { + const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op); ret = iseq->body->location.label; if (child) { rb_ary_push(child, (VALUE)iseq); @@ -1492,7 +1499,7 @@ rb_iseq_disasm(const rb_iseq_t *iseq) catch_type((int)entry->type), (int)entry->start, (int)entry->end, (int)entry->sp, (int)entry->cont); if (entry->iseq) { - rb_str_concat(str, rb_iseq_disasm(entry->iseq)); + rb_str_concat(str, rb_iseq_disasm(rb_iseq_check(entry->iseq))); } } } @@ -1561,7 +1568,7 @@ rb_iseq_disasm(const rb_iseq_t *iseq) for (l = 0; l < RARRAY_LEN(child); l++) { VALUE isv = rb_ary_entry(child, l); - rb_str_concat(str, rb_iseq_disasm((rb_iseq_t *)isv)); + rb_str_concat(str, rb_iseq_disasm(rb_iseq_check((rb_iseq_t *)isv))); } return str; @@ -1907,7 +1914,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) { const rb_iseq_t *iseq = (rb_iseq_t *)*seq; if (iseq) { - VALUE val = iseq_data_to_ary(iseq); + VALUE val = iseq_data_to_ary(rb_iseq_check(iseq)); rb_ary_push(ary, val); } else { @@ -2002,7 +2009,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) const struct iseq_catch_table_entry *entry = &iseq->body->catch_table->entries[i]; rb_ary_push(ary, exception_type2symbol(entry->type)); if (entry->iseq) { - rb_ary_push(ary, iseq_data_to_ary(entry->iseq)); + rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq))); } else { rb_ary_push(ary, Qnil); @@ -2326,6 +2333,51 @@ rb_iseqw_local_variables(VALUE iseqval) } /* + * call-seq: + * iseq.to_binary_format(extra_data = nil) -> binary str + * + * Returns serialized iseq binary format data as a String object. + * A correspnding iseq object is created by + * RubyVM::InstructionSequence.from_binary_format() method. + * + * String extra_data will be saved with binary data. + * You can access this data with + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary). + */ +static VALUE +iseqw_to_binary_format(int argc, VALUE *argv, VALUE self) +{ + VALUE opt; + rb_scan_args(argc, argv, "01", &opt); + return iseq_ibf_dump(iseqw_check(self), opt); +} + +/* + * call-seq: + * RubyVM::InstructionSequence.from_binary_format(binary) -> iseq + * + * Load an iseq object from binary format String object + * created by RubyVM::InstructionSequence.to_binary_format. + */ +static VALUE +iseqw_s_from_binary_format(VALUE self, VALUE str) +{ + return iseqw_new(iseq_ibf_load(str)); +} + +/* + * call-seq: + * RubyVM::InstructionSequence.from_binary_format_extra_data(binary) -> str + * + * Load extra data embed into binary format String object. + */ +static VALUE +iseqw_s_from_binary_format_extra_data(VALUE self, VALUE str) +{ + return iseq_ibf_load_extra_data(str); +} + +/* * Document-class: RubyVM::InstructionSequence * * The InstructionSequence class represents a compiled sequence of @@ -2356,6 +2408,11 @@ Init_ISeq(void) rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0); rb_define_method(rb_cISeq, "eval", iseqw_eval, 0); + rb_define_method(rb_cISeq, "to_binary_format", iseqw_to_binary_format, -1); + rb_define_singleton_method(rb_cISeq, "from_binary_format", iseqw_s_from_binary_format, 1); + rb_define_singleton_method(rb_cISeq, "from_binary_format_extra_data", iseqw_s_from_binary_format_extra_data, 1); + + /* location APIs */ rb_define_method(rb_cISeq, "path", iseqw_path, 0); rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0); @@ -12,6 +12,9 @@ #ifndef RUBY_ISEQ_H #define RUBY_ISEQ_H 1 +#define ISEQ_MAJOR_VERSION 2 +#define ISEQ_MINOR_VERSION 3 + #ifndef rb_iseq_t typedef struct rb_iseq_struct rb_iseq_t; #define rb_iseq_t rb_iseq_t @@ -29,16 +32,27 @@ enum iseq_mark_ary_index { ISEQ_MARK_ARY_ORIGINAL_ISEQ = 2, }; +static inline VALUE +iseq_mark_ary_create(int flip_cnt) +{ + VALUE ary = rb_ary_tmp_new(3); + rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_COVERAGE */ + rb_ary_push(ary, INT2FIX(flip_cnt)); /* ISEQ_MARK_ARY_FLIP_CNT */ + rb_ary_push(ary, Qnil); /* ISEQ_MARK_ARY_ORIGINAL_ISEQ */ + return ary; +} + #define ISEQ_MARK_ARY(iseq) (iseq)->body->mark_ary #define ISEQ_COVERAGE(iseq) RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_COVERAGE) #define ISEQ_COVERAGE_SET(iseq, cov) RARRAY_ASET(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_COVERAGE, cov) +#define ISEQ_FLIP_CNT(iseq) FIX2INT(RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT)) + static inline int ISEQ_FLIP_CNT_INCREMENT(const rb_iseq_t *iseq) { - VALUE cntv = RARRAY_AREF(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT); - int cnt = FIX2INT(cntv); + int cnt = ISEQ_FLIP_CNT(iseq); RARRAY_ASET(ISEQ_MARK_ARY(iseq), ISEQ_MARK_ARY_FLIP_CNT, INT2FIX(cnt+1)); return cnt; } @@ -59,7 +73,20 @@ ISEQ_ORIGINAL_ISEQ_ALLOC(const rb_iseq_t *iseq, long size) return (VALUE *)RSTRING_PTR(str); } -#define ISEQ_COMPILE_DATA(iseq) (iseq)->compile_data_ +#define ISEQ_COMPILE_DATA(iseq) (iseq)->aux.compile_data + +static inline rb_iseq_t * +iseq_imemo_alloc(void) +{ + return (rb_iseq_t *)rb_imemo_new(imemo_iseq, 0, 0, 0, 0); +} + +#define ISEQ_NOT_LOADED_YET IMEMO_FL_USER1 + +VALUE iseq_ibf_dump(const rb_iseq_t *iseq, VALUE opt); +void ibf_load_iseq_complete(rb_iseq_t *iseq); +const rb_iseq_t *iseq_ibf_load(VALUE str); +VALUE iseq_ibf_load_extra_data(VALUE str); RUBY_SYMBOL_EXPORT_BEGIN @@ -575,6 +575,7 @@ rb_provide(const char *feature) } NORETURN(static void load_failed(VALUE)); +const rb_iseq_t *rb_iseq_load_iseq(VALUE fname); static int rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap) @@ -604,12 +605,17 @@ rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap) state = EXEC_TAG(); if (state == 0) { NODE *node; - rb_iseq_t *iseq; + const rb_iseq_t *iseq; - th->mild_compile_error++; - node = (NODE *)rb_load_file_str(fname); - iseq = rb_iseq_new_top(node, rb_str_new2("<top (required)>"), fname, rb_realpath_internal(Qnil, fname, 1), NULL); - th->mild_compile_error--; + if ((iseq = rb_iseq_load_iseq(fname)) != NULL) { + /* OK */ + } + else { + th->mild_compile_error++; + node = (NODE *)rb_load_file_str(fname); + iseq = rb_iseq_new_top(node, rb_str_new2("<top (required)>"), fname, rb_realpath_internal(Qnil, fname, 1), NULL); + th->mild_compile_error--; + } rb_iseq_eval(iseq); } TH_POP_TAG(); @@ -984,12 +984,15 @@ rb_proc_get_iseq(VALUE self, int *is_proc) iseq = rb_method_iseq((VALUE)ifunc->data); if (is_proc) *is_proc = 0; } + return iseq; } else if (SYMBOL_P(iseq)) { self = rb_sym_to_proc((VALUE)iseq); goto again; } - return iseq; + else { + return rb_iseq_check(iseq); + } } static VALUE @@ -998,6 +1001,7 @@ iseq_location(const rb_iseq_t *iseq) VALUE loc[2]; if (!iseq) return Qnil; + rb_iseq_check(iseq); loc[0] = iseq->body->location.path; if (iseq->body->line_info_table) { loc[1] = rb_iseq_first_lineno(iseq); @@ -1142,7 +1146,7 @@ proc_to_s(VALUE self) iseq = proc->block.iseq; is_lambda = proc->is_lambda ? " (lambda)" : ""; - if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + if (RUBY_VM_NORMAL_ISEQ_P(iseq) && rb_iseq_check(iseq)) { int first_lineno = 0; if (iseq->body->line_info_table) { @@ -2152,7 +2156,7 @@ rb_method_entry_min_max_arity(const rb_method_entry_t *me, int *max) case VM_METHOD_TYPE_BMETHOD: return rb_proc_min_max_arity(def->body.proc, max); case VM_METHOD_TYPE_ISEQ: { - const rb_iseq_t *iseq = def->body.iseq.iseqptr; + const rb_iseq_t *iseq = rb_iseq_check(def->body.iseq.iseqptr); return rb_iseq_min_max_arity(iseq, max); } case VM_METHOD_TYPE_UNDEF: @@ -2289,7 +2293,7 @@ method_def_iseq(const rb_method_definition_t *def) { switch (def->type) { case VM_METHOD_TYPE_ISEQ: - return def->body.iseq.iseqptr; + return rb_iseq_check(def->body.iseq.iseqptr); case VM_METHOD_TYPE_BMETHOD: return get_proc_iseq(def->body.proc, 0); case VM_METHOD_TYPE_ALIAS: @@ -2654,6 +2658,7 @@ proc_binding(VALUE self) bind->env = envval; if (iseq) { + rb_iseq_check(iseq); bind->path = iseq->body->location.path; bind->first_lineno = FIX2INT(rb_iseq_first_lineno(iseq)); } diff --git a/sample/iseq_loader.rb b/sample/iseq_loader.rb new file mode 100644 index 0000000000..4fbf02b0f6 --- /dev/null +++ b/sample/iseq_loader.rb @@ -0,0 +1,240 @@ +# +# iseq_loader.rb - sample of compiler/loader for binary compiled file +# +# Usage as a compiler: ruby iseq_loader.rb [file or directory] ... +# +# It compiles and stores specified files. +# If directories are specified, then compiles and stores all *.rb files. +# (using Dir.glob) +# +# TODO: add remove option +# TODO: add verify option +# +# Usage as a loader: simply require this file with the following setting. +# +# Setting with environment variables. +# +# * RUBY_ISEQ_LOADER_STORAGE to select storage type +# * dbm: use dbm +# * fs: [default] use file system. locate a compiled binary files in same +# directory of scripts like Rubinius. foo.rb.yarb will be created for foo.rb. +# * fs2: use file system. locate compiled file in specified directory. +# * nothing: do nothing. +# +# * RUBY_ISEQ_LOADER_STORAGE_DIR to select directory +# * default: ~/.ruby_binaries/ +# +# * RUBY_ISEQ_LOADER_STORAGE_COMPILE_IF_NOT_COMPILED +# * true: store compiled file if compiled data is not available. +# * false: [default] do nothing if there is no compiled iseq data. + +class RubyVM::InstructionSequence + $ISEQ_LOADER_LOADED = 0 + $ISEQ_LOADER_COMPILED = 0 + $ISEQ_LOADER_IGNORED = 0 + LAUNCHED_TIME = Time.now + COMPILE_FILE_ENABLE = false || true + COMPILE_VERBOSE = $VERBOSE || false # || true + COMPILE_DEBUG = ENV['RUBY_ISEQ_LOADER_DEBUG'] + COMPILE_IF_NOT_COMPILED = ENV['RUBY_ISEQ_LOADER_STORAGE_COMPILE_IF_NOT_COMPILED'] == 'true' + + at_exit{ + STDERR.puts "[ISEQ_LOADER] #{Process.pid} time: #{Time.now - LAUNCHED_TIME}, " + + "loaded: #{$ISEQ_LOADER_LOADED}, " + + "compied: #{$ISEQ_LOADER_COMPILED}, " + + "ignored: #{$ISEQ_LOADER_IGNORED}" + } if COMPILE_VERBOSE + + unless cf_dir = ENV['RUBY_ISEQ_LOADER_STORAGE_DIR'] + cf_dir = File.expand_path("~/.ruby_binaries") + unless File.exist?(cf_dir) + Dir.mkdir(cf_dir) + end + end + CF_PREFIX = "#{cf_dir}/cb." + + class NullStorage + def load_iseq fname; end + def compile_and_save_isq fname; end + def unlink_compiled_iseq; end + end + + class BasicStorage + def initialize + require 'digest/sha1' + end + + def load_iseq fname + iseq_key = iseq_key_name(fname) + if compiled_iseq_exist?(fname, iseq_key) && compiled_iseq_is_younger?(fname, iseq_key) + $ISEQ_LOADER_LOADED += 1 + STDERR.puts "[ISEQ_LOADER] #{Process.pid} load #{fname} from #{iseq_key}" if COMPILE_DEBUG + binary = read_compiled_iseq(fname, iseq_key) + RubyVM::InstructionSequence.from_binary_format(binary) + elsif COMPILE_IF_NOT_COMPILED + compile_and_save_iseq(fname, iseq_key) + else + $ISEQ_LOADER_IGNORED += 1 + # p fname + nil + end + end + + def extra_data fname + "SHA-1:#{::Digest::SHA1.file(fname).digest}" + end + + def compile_and_save_iseq fname, iseq_key = iseq_key_name(fname) + $ISEQ_LOADER_COMPILED += 1 + STDERR.puts "[RUBY_COMPILED_FILE] compile #{fname}" if COMPILE_DEBUG + iseq = RubyVM::InstructionSequence.compile_file(fname) + + binary = iseq.to_binary_format(extra_data(fname)) + write_compiled_iseq(fname, iseq_key, binary) + iseq + end + + # def unlink_compiled_iseq; nil; end # should implement at sub classes + + private + + def iseq_key_name fname + fname + end + + # should implement at sub classes + # def compiled_iseq_younger? fname, iseq_key; end + # def compiled_iseq_exist? fname, iseq_key; end + # def read_compiled_file fname, iseq_key; end + # def write_compiled_file fname, iseq_key, binary; end + end + + class FSStorage < BasicStorage + def initialize + super + require 'fileutils' + @dir = CF_PREFIX + "files" + unless File.directory?(@dir) + FileUtils.mkdir_p(@dir) + end + end + + def unlink_compiled_iseq + File.unlink(compile_file_path) + end + + private + + def iseq_key_name fname + "#{fname}.yarb" # same directory + end + + def compiled_iseq_exist? fname, iseq_key + File.exist?(iseq_key) + end + + def compiled_iseq_is_younger? fname, iseq_key + File.mtime(iseq_key) >= File.mtime(fname) + end + + def read_compiled_iseq fname, iseq_key + open(iseq_key, 'rb'){|f| f.read} + end + + def write_compiled_iseq fname, iseq_key, binary + open(iseq_key, 'wb'){|f| f.write(binary)} + end + end + + class FS2Storage < FSStorage + def iseq_key_name fname + @dir + fname.gsub(/[^A-Za-z0-9\._-]/){|c| '%02x' % c.ord} # special directory + end + end + + class DBMStorage < BasicStorage + def initialize + require 'dbm' + @db = DBM.open(CF_PREFIX+'db') + end + + def unlink_compiled_iseq + @db.delete fname + end + + private + + def date_key_name fname + "date.#{fname}" + end + + def iseq_key_name fname + "body.#{fname}" + end + + def compiled_iseq_exist? fname, iseq_key + @db.has_key? iseq_key + end + + def compiled_iseq_is_younger? fname, iseq_key + date_key = date_key_name(fname) + if @db.has_key? date_key + @db[date_key].to_i >= File.mtime(fname).to_i + end + end + + def read_compiled_iseq fname, iseq_key + @db[iseq_key] + end + + def write_compiled_iseq fname, iseq_key, binary + date_key = date_key_name(fname) + @db[iseq_key] = binary + @db[date_key] = Time.now.to_i + end + end + + STORAGE = case ENV['RUBY_ISEQ_LOADER_STORAGE'] + when 'dbm' + DBMStorage.new + when 'fs' + FSStorage.new + when 'fs2' + FS2Storage.new + when 'null' + NullStorage.new + else + FSStorage.new + end + + STDERR.puts "[ISEQ_LOADER] use #{STORAGE.class} " if COMPILE_VERBOSE + + def self.load_iseq fname + STORAGE.load_iseq(fname) + end + + def self.compile_and_save_iseq fname + STORAGE.compile_and_save_iseq fname + end + + def self.unlink_compiled_iseq fname + STORAGE.unlink_compiled_iseq fname + end +end + +if __FILE__ == $0 + ARGV.each{|path| + if File.directory?(path) + pattern = File.join(path, '**/*.rb') + Dir.glob(pattern){|file| + begin + RubyVM::InstructionSequence.compile_and_save_iseq(file) + rescue SyntaxError => e + STDERR.puts e + end + } + else + RubyVM::InstructionSequence.compile_and_save_iseq(path) + end + } +end diff --git a/test/lib/iseq_loader_checker.rb b/test/lib/iseq_loader_checker.rb index 0c372ca638..09df3d38be 100644 --- a/test/lib/iseq_loader_checker.rb +++ b/test/lib/iseq_loader_checker.rb @@ -1,5 +1,8 @@ -require '-test-/iseq_load/iseq_load' +begin + require '-test-/iseq_load/iseq_load' +rescue LoadError +end require 'tempfile' class RubyVM::InstructionSequence @@ -21,9 +24,6 @@ class RubyVM::InstructionSequence d2 = i2.disasm_if_possible if d1 != d2 - p i1 - return - STDERR.puts "expected:" STDERR.puts d1 STDERR.puts "actual:" @@ -37,19 +37,38 @@ class RubyVM::InstructionSequence i2 end + CHECK_TO_A = ENV['RUBY_ISEQ_DUMP_DEBUG'] == 'to_a' + CHECK_TO_BINARY = ENV['RUBY_ISEQ_DUMP_DEBUG'] == 'to_binary' + def self.translate i1 # check to_a/load_iseq - i2 = compare_dump_and_load(i1, - proc{|iseq| - ary = iseq.to_a - ary[9] == :top ? ary : nil - }, - proc{|ary| - RubyVM::InstructionSequence.iseq_load(ary) - }) + i2_ary = compare_dump_and_load(i1, + proc{|iseq| + ary = iseq.to_a + ary[9] == :top ? ary : nil + }, + proc{|ary| + RubyVM::InstructionSequence.iseq_load(ary) + }) if CHECK_TO_A && defined?(RubyVM::InstructionSequence.iseq_load) + + # check to_binary_format + i2_bin = compare_dump_and_load(i1, + proc{|iseq| + begin + iseq.to_binary_format + rescue RuntimeError => e # not a toplevel + # STDERR.puts [:failed, e, iseq].inspect + nil + end + }, + proc{|bin| + iseq = RubyVM::InstructionSequence.from_binary_format(bin) + # STDERR.puts iseq.inspect + iseq + }) if CHECK_TO_BINARY # return value - i1 - end + i2_bin if CHECK_TO_BINARY + end if CHECK_TO_A || CHECK_TO_BINARY end #require_relative 'x'; exit(1) diff --git a/test/runner.rb b/test/runner.rb index c3cb2d8472..13506e592e 100644 --- a/test/runner.rb +++ b/test/runner.rb @@ -22,7 +22,7 @@ ENV["GEM_SKIP"] = ENV["GEM_HOME"] = ENV["GEM_PATH"] = "".freeze require_relative 'lib/profile_test_all' if ENV.has_key?('RUBY_TEST_ALL_PROFILE') require_relative 'lib/tracepointchecker' require_relative 'lib/zombie_hunter' -# require_relative 'lib/iseq_loader_checker' +require_relative 'lib/iseq_loader_checker' if ENV['COVERAGE'] %w[doclie simplecov-html simplecov].each do |f| @@ -945,7 +945,7 @@ invoke_block_from_c_0(rb_thread_t *th, const rb_block_t *block, return Qnil; } else if (LIKELY(RUBY_VM_NORMAL_ISEQ_P(block->iseq))) { - const rb_iseq_t *iseq = block->iseq; + const rb_iseq_t *iseq = rb_iseq_check(block->iseq); int i, opt_pc; int type = block_proc_is_lambda(block->proc) ? VM_FRAME_MAGIC_LAMBDA : VM_FRAME_MAGIC_BLOCK; VALUE *sp = th->cfp->sp; @@ -1816,6 +1816,7 @@ vm_exec(rb_thread_t *th) if (catch_iseq != NULL) { /* found catch table */ /* enter catch scope */ + rb_iseq_check(catch_iseq); cfp->sp = vm_base_ptr(cfp) + cont_sp; cfp->pc = cfp->iseq->body->iseq_encoded + cont_pc; @@ -257,10 +257,10 @@ struct rb_call_cache { #endif typedef struct rb_iseq_location_struct { - const VALUE path; - const VALUE absolute_path; - const VALUE base_label; - const VALUE label; + VALUE path; + VALUE absolute_path; + VALUE base_label; + VALUE label; VALUE first_lineno; /* TODO: may be unsigned short */ } rb_iseq_location_t; @@ -376,7 +376,7 @@ struct rb_iseq_constant_body { */ struct rb_call_cache *cc_entries; /* size is ci_size = ci_kw_size */ - const VALUE mark_ary; /* Array: includes operands which should be GC marked */ + VALUE mark_ary; /* Array: includes operands which should be GC marked */ unsigned int local_table_size; unsigned int is_size; @@ -389,12 +389,40 @@ struct rb_iseq_constant_body { /* typedef rb_iseq_t is in method.h */ struct rb_iseq_struct { VALUE flags; - struct iseq_compile_data *compile_data_; /* used at compile time */ - struct rb_iseq_constant_body *body; VALUE reserved1; - VALUE reserved2; + struct rb_iseq_constant_body *body; + + union { /* 4, 5 words */ + struct iseq_compile_data *compile_data; /* used at compile time */ + + struct { + VALUE obj; + int index; + } loader; + } aux; }; +#define USE_LAZY_LOAD 0 + +#ifndef USE_LAZY_LOAD +#define USE_LAZY_LOAD +#endif + +#if USE_LAZY_LOAD +const rb_iseq_t *rb_iseq_complete(const rb_iseq_t *iseq); + +static inline const rb_iseq_t * +rb_iseq_check(const rb_iseq_t *iseq) +{ + if (iseq->body == NULL) { + rb_iseq_complete((rb_iseq_t *)iseq); + } + return iseq; +} +#else +#define rb_iseq_check(iseq) iseq +#endif + enum ruby_special_exceptions { ruby_error_reenter, ruby_error_nomemory, @@ -962,7 +990,7 @@ rb_block_t *rb_vm_control_frame_block_ptr(const rb_control_frame_t *cfp); (!RUBY_VM_VALID_CONTROL_FRAME_P((cfp), RUBY_VM_END_CONTROL_FRAME(th))) #define RUBY_VM_IFUNC_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_ifunc) -#define RUBY_VM_NORMAL_ISEQ_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_iseq) +#define RUBY_VM_NORMAL_ISEQ_P(ptr) (RB_TYPE_P((VALUE)(ptr), T_IMEMO) && imemo_type((VALUE)ptr) == imemo_iseq && rb_iseq_check((rb_iseq_t *)ptr)) #define RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp) ((rb_block_t *)(&(cfp)->self)) #define RUBY_VM_GET_CFP_FROM_BLOCK_PTR(b) \ diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 033edea5fd..cfa76ab6c9 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1400,7 +1400,7 @@ def_iseq_ptr(rb_method_definition_t *def) #if VM_CHECK_MODE > 0 if (def->type != VM_METHOD_TYPE_ISEQ) rb_bug("def_iseq_ptr: not iseq (%d)", def->type); #endif - return def->body.iseq.iseqptr; + return rb_iseq_check(def->body.iseq.iseqptr); } static VALUE @@ -2428,15 +2428,14 @@ static VALUE vm_invoke_block(rb_thread_t *th, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_call_info *ci) { const rb_block_t *block = VM_CF_BLOCK_PTR(reg_cfp); - const rb_iseq_t *iseq; VALUE type = GET_ISEQ()->body->local_iseq->body->type; if ((type != ISEQ_TYPE_METHOD && type != ISEQ_TYPE_CLASS) || block == 0) { rb_vm_localjump_error("no block given (yield)", Qnil, 0); } - iseq = block->iseq; - if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + if (RUBY_VM_NORMAL_ISEQ_P(block->iseq)) { + const rb_iseq_t *iseq = block->iseq; const int arg_size = iseq->body->param.size; int is_lambda = block_proc_is_lambda(block->proc); VALUE * const rsp = GET_SP() - calling->argc; |