diff options
Diffstat (limited to 'iseq.c')
-rw-r--r-- | iseq.c | 2849 |
1 files changed, 1660 insertions, 1189 deletions
@@ -19,7 +19,6 @@ #endif #include "eval_intern.h" -#include "gc.h" #include "id_table.h" #include "internal.h" #include "internal/bits.h" @@ -27,14 +26,16 @@ #include "internal/compile.h" #include "internal/error.h" #include "internal/file.h" +#include "internal/gc.h" #include "internal/hash.h" -#include "internal/parse.h" +#include "internal/io.h" +#include "internal/ruby_parser.h" #include "internal/sanitizers.h" #include "internal/symbol.h" #include "internal/thread.h" #include "internal/variable.h" #include "iseq.h" -#include "mjit.h" +#include "rjit.h" #include "ruby/util.h" #include "vm_core.h" #include "vm_callinfo.h" @@ -60,19 +61,19 @@ static inline VALUE obj_resurrect(VALUE obj) { if (hidden_obj_p(obj)) { - switch (BUILTIN_TYPE(obj)) { - case T_STRING: - obj = rb_str_resurrect(obj); - break; - case T_ARRAY: - obj = rb_ary_resurrect(obj); - break; + switch (BUILTIN_TYPE(obj)) { + case T_STRING: + obj = rb_str_resurrect(obj); + break; + case T_ARRAY: + obj = rb_ary_resurrect(obj); + break; case T_HASH: obj = rb_hash_resurrect(obj); break; default: - break; - } + break; + } } return obj; } @@ -93,12 +94,66 @@ static void compile_data_free(struct iseq_compile_data *compile_data) { if (compile_data) { - free_arena(compile_data->node.storage_head); - free_arena(compile_data->insn.storage_head); - if (compile_data->ivar_cache_table) { - rb_id_table_free(compile_data->ivar_cache_table); - } - ruby_xfree(compile_data); + free_arena(compile_data->node.storage_head); + free_arena(compile_data->insn.storage_head); + if (compile_data->ivar_cache_table) { + rb_id_table_free(compile_data->ivar_cache_table); + } + ruby_xfree(compile_data); + } +} + +static void +remove_from_constant_cache(ID id, IC ic) +{ + rb_vm_t *vm = GET_VM(); + VALUE lookup_result; + st_data_t ic_data = (st_data_t)ic; + + if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) { + st_table *ics = (st_table *)lookup_result; + st_delete(ics, &ic_data, NULL); + + if (ics->num_entries == 0) { + rb_id_table_delete(vm->constant_cache, id); + st_free_table(ics); + } + } +} + +// When an ISEQ is being freed, all of its associated ICs are going to go away +// as well. Because of this, we need to iterate over the ICs, and clear them +// from the VM's constant cache. +static void +iseq_clear_ic_references(const rb_iseq_t *iseq) +{ + // In some cases (when there is a compilation error), we end up with + // ic_size greater than 0, but no allocated is_entries buffer. + // If there's no is_entries buffer to loop through, return early. + // [Bug #19173] + if (!ISEQ_BODY(iseq)->is_entries) { + return; + } + + for (unsigned int ic_idx = 0; ic_idx < ISEQ_BODY(iseq)->ic_size; ic_idx++) { + IC ic = &ISEQ_IS_IC_ENTRY(ISEQ_BODY(iseq), ic_idx); + + // Iterate over the IC's constant path's segments and clean any references to + // the ICs out of the VM's constant cache table. + const ID *segments = ic->segments; + + // It's possible that segments is NULL if we overallocated an IC but + // optimizations removed the instruction using it + if (segments == NULL) + continue; + + for (int i = 0; segments[i]; i++) { + ID id = segments[i]; + if (id == idNULL) continue; + remove_from_constant_cache(id, ic); + } + + ruby_xfree((void *)segments); } } @@ -107,33 +162,44 @@ rb_iseq_free(const rb_iseq_t *iseq) { RUBY_FREE_ENTER("iseq"); - if (iseq && iseq->body) { - struct rb_iseq_constant_body *const body = iseq->body; - mjit_free_iseq(iseq); /* Notify MJIT */ - rb_yjit_iseq_free(body); - ruby_xfree((void *)body->iseq_encoded); - ruby_xfree((void *)body->insns_info.body); - if (body->insns_info.positions) ruby_xfree((void *)body->insns_info.positions); + if (iseq && ISEQ_BODY(iseq)) { + iseq_clear_ic_references(iseq); + struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); + rb_rjit_free_iseq(iseq); /* Notify RJIT */ +#if USE_YJIT + rb_yjit_iseq_free(iseq); + if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) { + RUBY_ASSERT(rb_yjit_live_iseq_count > 0); + rb_yjit_live_iseq_count--; + } +#endif + ruby_xfree((void *)body->iseq_encoded); + ruby_xfree((void *)body->insns_info.body); + ruby_xfree((void *)body->insns_info.positions); #if VM_INSN_INFO_TABLE_IMPL == 2 - if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table); + ruby_xfree(body->insns_info.succ_index_table); #endif if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl)) ruby_xfree((void *)body->local_table); - ruby_xfree((void *)body->is_entries); + ruby_xfree((void *)body->is_entries); + ruby_xfree(body->call_data); + ruby_xfree((void *)body->catch_table); + ruby_xfree((void *)body->param.opt_table); + if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) { + ruby_xfree((void *)body->mark_bits.list); + } - if (body->call_data) { - ruby_xfree(body->call_data); - } - ruby_xfree((void *)body->catch_table); - ruby_xfree((void *)body->param.opt_table); - - if (body->param.keyword != NULL) { - ruby_xfree((void *)body->param.keyword->default_values); - ruby_xfree((void *)body->param.keyword); - } - compile_data_free(ISEQ_COMPILE_DATA(iseq)); + ruby_xfree(body->variable.original_iseq); + + if (body->param.keyword != NULL) { + if (body->param.keyword->table != &body->local_table[body->param.keyword->bits_start - body->param.keyword->num]) + ruby_xfree((void *)body->param.keyword->table); + ruby_xfree((void *)body->param.keyword->default_values); + ruby_xfree((void *)body->param.keyword); + } + compile_data_free(ISEQ_COMPILE_DATA(iseq)); if (body->outer_variables) rb_id_table_free(body->outer_variables); - ruby_xfree(body); + ruby_xfree(body); } if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) { @@ -143,296 +209,211 @@ rb_iseq_free(const rb_iseq_t *iseq) RUBY_FREE_LEAVE("iseq"); } -#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE -static VALUE -rb_vm_insn_addr2insn2(const void *addr) -{ - return (VALUE)rb_vm_insn_addr2insn(addr); -} -#endif - -static VALUE -rb_vm_insn_null_translator(const void *addr) -{ - return (VALUE)addr; -} - typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj); -typedef VALUE rb_vm_insns_translator_t(const void *addr); -static int -iseq_extract_values(VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator) +static inline void +iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, VALUE *original_iseq) { - VALUE insn = translator((void *)code[pos]); - int len = insn_len(insn); - int op_no; - const char *types = insn_op_types(insn); + unsigned int offset; + unsigned int page_offset = (page * ISEQ_MBITS_BITLENGTH); - for (op_no = 0; types[op_no]; op_no++) { - char type = types[op_no]; - switch (type) { - case TS_CDHASH: - case TS_ISEQ: - case TS_VALUE: - { - VALUE op = code[pos + op_no + 1]; - if (!SPECIAL_CONST_P(op)) { - VALUE newop = func(data, op); - if (newop != op) { - code[pos + op_no + 1] = newop; - } - } - } - break; - case TS_IC: - { - IC ic = (IC)code[pos + op_no + 1]; - if (ic->entry) { - VALUE nv = func(data, (VALUE)ic->entry); - if ((VALUE)ic->entry != nv) { - ic->entry = (void *)nv; - } - } - } - break; - case TS_IVC: - { - IVC ivc = (IVC)code[pos + op_no + 1]; - if (ivc->entry) { - if (RB_TYPE_P(ivc->entry->class_value, T_NONE)) { - rb_bug("!! %u", ivc->entry->index); - } - VALUE nv = func(data, ivc->entry->class_value); - if (ivc->entry->class_value != nv) { - ivc->entry->class_value = nv; - } - } - } - break; - case TS_ISE: - { - union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)code[pos + op_no + 1]; - if (is->once.value) { - VALUE nv = func(data, is->once.value); - if (is->once.value != nv) { - is->once.value = nv; - } - } - } - break; - default: - break; - } + while (bits) { + offset = ntz_intptr(bits); + VALUE op = code[page_offset + offset]; + rb_gc_mark_and_move(&code[page_offset + offset]); + VALUE newop = code[page_offset + offset]; + if (original_iseq && newop != op) { + original_iseq[page_offset + offset] = newop; + } + bits &= bits - 1; // Reset Lowest Set Bit (BLSR) } - - return len; } static void -rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data) +rb_iseq_mark_and_move_each_value(const rb_iseq_t *iseq, VALUE *original_iseq) { unsigned int size; VALUE *code; - size_t n; - rb_vm_insns_translator_t *const translator = -#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE - (FL_TEST((VALUE)iseq, ISEQ_TRANSLATED)) ? rb_vm_insn_addr2insn2 : -#endif - rb_vm_insn_null_translator; - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); size = body->iseq_size; code = body->iseq_encoded; - for (n = 0; n < size;) { - n += iseq_extract_values(code, n, func, data, translator); - } -} + union iseq_inline_storage_entry *is_entries = body->is_entries; -static VALUE -update_each_insn_value(void *ctx, VALUE obj) -{ - return rb_gc_location(obj); -} + if (body->is_entries) { + // Skip iterating over ivc caches + is_entries += body->ivc_size; -void -rb_iseq_update_references(rb_iseq_t *iseq) -{ - if (iseq->body) { - struct rb_iseq_constant_body *body = iseq->body; + // ICVARC entries + for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) { + ICVARC icvarc = (ICVARC)is_entries; + if (icvarc->entry) { + RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE)); - body->variable.coverage = rb_gc_location(body->variable.coverage); - body->variable.pc2branchindex = rb_gc_location(body->variable.pc2branchindex); - body->location.label = rb_gc_location(body->location.label); - body->location.base_label = rb_gc_location(body->location.base_label); - body->location.pathobj = rb_gc_location(body->location.pathobj); - if (body->local_iseq) { - body->local_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->local_iseq); - } - if (body->parent_iseq) { - body->parent_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->parent_iseq); - } - if (body->mandatory_only_iseq) { - body->mandatory_only_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->mandatory_only_iseq); - } - if (body->call_data) { - for (unsigned int i=0; i<body->ci_size; i++) { - struct rb_call_data *cds = body->call_data; - if (!SPECIAL_CONST_P((VALUE)cds[i].ci)) { - cds[i].ci = (struct rb_callinfo *)rb_gc_location((VALUE)cds[i].ci); - } - cds[i].cc = (struct rb_callcache *)rb_gc_location((VALUE)cds[i].cc); + rb_gc_mark_and_move(&icvarc->entry->class_value); } } - if (FL_TEST((VALUE)iseq, ISEQ_MARKABLE_ISEQ)) { - rb_iseq_each_value(iseq, update_each_insn_value, NULL); - VALUE *original_iseq = ISEQ_ORIGINAL_ISEQ(iseq); - if (original_iseq) { - size_t n = 0; - const unsigned int size = body->iseq_size; - while (n < size) { - n += iseq_extract_values(original_iseq, n, update_each_insn_value, NULL, rb_vm_insn_null_translator); - } + + // ISE entries + for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) { + union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries; + if (is->once.value) { + rb_gc_mark_and_move(&is->once.value); } } - if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { - int i, j; - - i = body->param.keyword->required_num; - - for (j = 0; i < body->param.keyword->num; i++, j++) { - VALUE obj = body->param.keyword->default_values[j]; - if (obj != Qundef) { - body->param.keyword->default_values[j] = rb_gc_location(obj); - } + // IC Entries + for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) { + IC ic = (IC)is_entries; + if (ic->entry) { + rb_gc_mark_and_move_ptr(&ic->entry); } } + } - if (body->catch_table) { - struct iseq_catch_table *table = body->catch_table; - unsigned int i; - for (i = 0; i < table->size; i++) { - struct iseq_catch_table_entry *entry; - entry = UNALIGNED_MEMBER_PTR(table, entries[i]); - if (entry->iseq) { - entry->iseq = (rb_iseq_t *)rb_gc_location((VALUE)entry->iseq); + // Embedded VALUEs + if (body->mark_bits.list) { + if (ISEQ_MBITS_BUFLEN(size) == 1) { + iseq_scan_bits(0, body->mark_bits.single, code, original_iseq); + } + else { + if (body->mark_bits.list) { + for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) { + iseq_bits_t bits = body->mark_bits.list[i]; + iseq_scan_bits(i, bits, code, original_iseq); } } } -#if USE_MJIT - mjit_update_references(iseq); -#endif - rb_yjit_iseq_update_references(body); } } -static VALUE -each_insn_value(void *ctx, VALUE obj) +static bool +cc_is_active(const struct rb_callcache *cc, bool reference_updating) { - rb_gc_mark_movable(obj); - return obj; + if (cc) { + if (cc == rb_vm_empty_cc() || rb_vm_empty_cc_for_super()) { + return false; + } + + if (reference_updating) { + cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc); + } + + if (vm_cc_markable(cc)) { + if (cc->klass) { // cc is not invalidated + const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc); + if (reference_updating) { + cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme); + } + if (!METHOD_ENTRY_INVALIDATED(cme)) { + return true; + } + } + } + } + return false; } void -rb_iseq_mark(const rb_iseq_t *iseq) +rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating) { RUBY_MARK_ENTER("iseq"); - RUBY_MARK_UNLESS_NULL(iseq->wrapper); + rb_gc_mark_and_move(&iseq->wrapper); - if (iseq->body) { - const struct rb_iseq_constant_body *const body = iseq->body; + if (ISEQ_BODY(iseq)) { + struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); - if (FL_TEST((VALUE)iseq, ISEQ_MARKABLE_ISEQ)) { - rb_iseq_each_value(iseq, each_insn_value, NULL); - } + rb_iseq_mark_and_move_each_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL); - rb_gc_mark_movable(body->variable.coverage); - rb_gc_mark_movable(body->variable.pc2branchindex); - rb_gc_mark_movable(body->variable.script_lines); - rb_gc_mark_movable(body->location.label); - rb_gc_mark_movable(body->location.base_label); - rb_gc_mark_movable(body->location.pathobj); - RUBY_MARK_MOVABLE_UNLESS_NULL((VALUE)body->mandatory_only_iseq); - RUBY_MARK_MOVABLE_UNLESS_NULL((VALUE)body->parent_iseq); + rb_gc_mark_and_move(&body->variable.coverage); + rb_gc_mark_and_move(&body->variable.pc2branchindex); + rb_gc_mark_and_move(&body->variable.script_lines); + rb_gc_mark_and_move(&body->location.label); + rb_gc_mark_and_move(&body->location.base_label); + rb_gc_mark_and_move(&body->location.pathobj); + if (body->local_iseq) rb_gc_mark_and_move_ptr(&body->local_iseq); + if (body->parent_iseq) rb_gc_mark_and_move_ptr(&body->parent_iseq); + if (body->mandatory_only_iseq) rb_gc_mark_and_move_ptr(&body->mandatory_only_iseq); if (body->call_data) { - struct rb_call_data *cds = (struct rb_call_data *)body->call_data; - for (unsigned int i=0; i<body->ci_size; i++) { - const struct rb_callinfo *ci = cds[i].ci; - const struct rb_callcache *cc = cds[i].cc; + for (unsigned int i = 0; i < body->ci_size; i++) { + struct rb_call_data *cds = body->call_data; - if (vm_ci_markable(ci)) { - rb_gc_mark_movable((VALUE)ci); + if (cds[i].ci) rb_gc_mark_and_move_ptr(&cds[i].ci); + + if (cc_is_active(cds[i].cc, reference_updating)) { + rb_gc_mark_and_move_ptr(&cds[i].cc); + } + else { + cds[i].cc = rb_vm_empty_cc(); } + } + } - if (cc) { - VM_ASSERT((cc->flags & VM_CALLCACHE_ON_STACK) == 0); + if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { + const struct rb_iseq_param_keyword *const keyword = body->param.keyword; - if (vm_cc_markable(cc)) { - if (!vm_cc_invalidated_p(cc)) { - rb_gc_mark_movable((VALUE)cc); - } - else { - cds[i].cc = rb_vm_empty_cc(); - } - } + for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) { + rb_gc_mark_and_move(&keyword->default_values[j]); + } + } + + if (body->catch_table) { + struct iseq_catch_table *table = body->catch_table; + + for (unsigned int i = 0; i < table->size; i++) { + struct iseq_catch_table_entry *entry; + entry = UNALIGNED_MEMBER_PTR(table, entries[i]); + if (entry->iseq) { + rb_gc_mark_and_move_ptr(&entry->iseq); } } } - if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { - const struct rb_iseq_param_keyword *const keyword = body->param.keyword; - int i, j; - - i = keyword->required_num; - - for (j = 0; i < keyword->num; i++, j++) { - VALUE obj = keyword->default_values[j]; - if (!SPECIAL_CONST_P(obj)) { - rb_gc_mark_movable(obj); - } - } - } - - if (body->catch_table) { - const struct iseq_catch_table *table = body->catch_table; - unsigned int i; - for (i = 0; i < table->size; i++) { - const struct iseq_catch_table_entry *entry; - entry = UNALIGNED_MEMBER_PTR(table, entries[i]); - if (entry->iseq) { - rb_gc_mark_movable((VALUE)entry->iseq); - } - } - } - -#if USE_MJIT - mjit_mark_cc_entries(body); + if (reference_updating) { +#if USE_RJIT + rb_rjit_iseq_update_references(body); +#endif +#if USE_YJIT + rb_yjit_iseq_update_references(iseq); +#endif + } + else { +#if USE_RJIT + rb_rjit_iseq_mark(body->rjit_blocks); +#endif +#if USE_YJIT + rb_yjit_iseq_mark(body->yjit_payload); #endif - rb_yjit_iseq_mark(body); + } } if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) { - rb_gc_mark(iseq->aux.loader.obj); + rb_gc_mark_and_move(&iseq->aux.loader.obj); } else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) { - const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); - - rb_iseq_mark_insn_storage(compile_data->insn.storage_head); - - RUBY_MARK_UNLESS_NULL(compile_data->err_info); - if (RTEST(compile_data->catch_table_ary)) { - rb_gc_mark(compile_data->catch_table_ary); + const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); + + if (!reference_updating) { + /* The operands in each instruction needs to be pinned because + * if auto-compaction runs in iseq_set_sequence, then the objects + * could exist on the generated_iseq buffer, which would not be + * reference updated which can lead to T_MOVED (and subsequently + * T_NONE) objects on the iseq. */ + rb_iseq_mark_and_pin_insn_storage(compile_data->insn.storage_head); } - VM_ASSERT(compile_data != NULL); + + rb_gc_mark_and_move((VALUE *)&compile_data->err_info); + rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary); } else { /* executable */ VM_ASSERT(ISEQ_EXECUTABLE_P(iseq)); + if (iseq->aux.exec.local_hooks) { - rb_hook_list_mark(iseq->aux.exec.local_hooks); + rb_hook_list_mark_and_update(iseq->aux.exec.local_hooks); } } @@ -456,7 +437,7 @@ size_t rb_iseq_memsize(const rb_iseq_t *iseq) { size_t size = 0; /* struct already counted as RVALUE size */ - const struct rb_iseq_constant_body *body = iseq->body; + const struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); const struct iseq_compile_data *compile_data; /* TODO: should we count original_iseq? */ @@ -466,6 +447,7 @@ rb_iseq_memsize(const rb_iseq_t *iseq) size += body->iseq_size * sizeof(VALUE); size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int)); size += body->local_table_size * sizeof(ID); + size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE; if (body->catch_table) { size += iseq_catch_table_bytes(body->catch_table->size); } @@ -473,7 +455,20 @@ rb_iseq_memsize(const rb_iseq_t *iseq) size += param_keyword_size(body->param.keyword); /* body->is_entries */ - size += body->is_size * sizeof(union iseq_inline_storage_entry); + size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry); + + if (ISEQ_BODY(iseq)->is_entries) { + /* IC entries constant segments */ + for (unsigned int ic_idx = 0; ic_idx < body->ic_size; ic_idx++) { + IC ic = &ISEQ_IS_IC_ENTRY(body, ic_idx); + const ID *ids = ic->segments; + if (!ids) continue; + while (*ids++) { + size += sizeof(ID); + } + size += sizeof(ID); // null terminator + } + } /* body->call_data */ size += body->ci_size * sizeof(struct rb_call_data); @@ -482,15 +477,15 @@ rb_iseq_memsize(const rb_iseq_t *iseq) compile_data = ISEQ_COMPILE_DATA(iseq); if (compile_data) { - struct iseq_compile_data_storage *cur; + struct iseq_compile_data_storage *cur; - size += sizeof(struct iseq_compile_data); + size += sizeof(struct iseq_compile_data); - cur = compile_data->node.storage_head; - while (cur) { - size += cur->size + offsetof(struct iseq_compile_data_storage, buff); - cur = cur->next; - } + cur = compile_data->node.storage_head; + while (cur) { + size += cur->size + offsetof(struct iseq_compile_data_storage, buff); + cur = cur->next; + } } return size; @@ -508,7 +503,7 @@ static rb_iseq_t * iseq_alloc(void) { rb_iseq_t *iseq = iseq_imemo_alloc(); - iseq->body = rb_iseq_constant_body_alloc(); + ISEQ_BODY(iseq) = rb_iseq_constant_body_alloc(); return iseq; } @@ -520,13 +515,13 @@ rb_iseq_pathobj_new(VALUE path, VALUE realpath) VM_ASSERT(NIL_P(realpath) || RB_TYPE_P(realpath, T_STRING)); if (path == realpath || - (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) { - pathobj = rb_fstring(path); + (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) { + pathobj = rb_fstring(path); } else { - if (!NIL_P(realpath)) realpath = rb_fstring(realpath); - pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath); - rb_obj_freeze(pathobj); + if (!NIL_P(realpath)) realpath = rb_fstring(realpath); + pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath); + rb_obj_freeze(pathobj); } return pathobj; } @@ -534,28 +529,33 @@ rb_iseq_pathobj_new(VALUE path, VALUE realpath) void rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath) { - RB_OBJ_WRITE(iseq, &iseq->body->location.pathobj, - rb_iseq_pathobj_new(path, realpath)); + RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->location.pathobj, + rb_iseq_pathobj_new(path, realpath)); } static rb_iseq_location_t * -iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, VALUE first_lineno, const rb_code_location_t *code_location, const int node_id) +iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id) { - rb_iseq_location_t *loc = &iseq->body->location; + rb_iseq_location_t *loc = &ISEQ_BODY(iseq)->location; rb_iseq_pathobj_set(iseq, path, realpath); RB_OBJ_WRITE(iseq, &loc->label, name); RB_OBJ_WRITE(iseq, &loc->base_label, name); loc->first_lineno = first_lineno; + + if (ISEQ_BODY(iseq)->local_iseq == iseq && strcmp(RSTRING_PTR(name), "initialize") == 0) { + ISEQ_BODY(iseq)->param.flags.use_block = 1; + } + if (code_location) { loc->node_id = node_id; - loc->code_location = *code_location; + loc->code_location = *code_location; } else { - loc->code_location.beg_pos.lineno = 0; - loc->code_location.beg_pos.column = 0; - loc->code_location.end_pos.lineno = -1; - loc->code_location.end_pos.column = -1; + loc->code_location.beg_pos.lineno = 0; + loc->code_location.beg_pos.column = 0; + loc->code_location.end_pos.lineno = -1; + loc->code_location.end_pos.column = -1; } return loc; @@ -564,26 +564,26 @@ iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, VAL static void set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq) { - struct rb_iseq_constant_body *const body = iseq->body; + struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); const VALUE type = body->type; /* set class nest stack */ if (type == ISEQ_TYPE_TOP) { - body->local_iseq = iseq; + body->local_iseq = iseq; } else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) { - body->local_iseq = iseq; + body->local_iseq = iseq; } else if (piseq) { - body->local_iseq = piseq->body->local_iseq; + body->local_iseq = ISEQ_BODY(piseq)->local_iseq; } if (piseq) { - body->parent_iseq = piseq; + body->parent_iseq = piseq; } if (type == ISEQ_TYPE_MAIN) { - body->local_iseq = iseq; + body->local_iseq = iseq; } } @@ -604,16 +604,16 @@ new_arena(void) static VALUE prepare_iseq_build(rb_iseq_t *iseq, - VALUE name, VALUE path, VALUE realpath, VALUE first_lineno, const rb_code_location_t *code_location, const int node_id, - const rb_iseq_t *parent, int isolated_depth, enum iseq_type type, + VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id, + const rb_iseq_t *parent, int isolated_depth, enum rb_iseq_type type, VALUE script_lines, const rb_compile_option_t *option) { VALUE coverage = Qfalse; VALUE err_info = Qnil; - struct rb_iseq_constant_body *const body = iseq->body; + struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP)) - err_info = Qfalse; + err_info = Qfalse; body->type = type; set_relation(iseq, parent); @@ -621,7 +621,7 @@ prepare_iseq_build(rb_iseq_t *iseq, name = rb_fstring(name); iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id); if (iseq != body->local_iseq) { - RB_OBJ_WRITE(iseq, &body->location.base_label, body->local_iseq->body->location.label); + RB_OBJ_WRITE(iseq, &body->location.base_label, ISEQ_BODY(body->local_iseq)->location.label); } ISEQ_COVERAGE_SET(iseq, Qnil); ISEQ_ORIGINAL_ISEQ_CLEAR(iseq); @@ -645,17 +645,16 @@ prepare_iseq_build(rb_iseq_t *iseq, ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL; ISEQ_COMPILE_DATA(iseq)->builtin_function_table = GET_VM()->builtin_function_table; - if (option->coverage_enabled) { - VALUE coverages = rb_get_coverages(); - if (RTEST(coverages)) { - coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq)); - if (NIL_P(coverage)) coverage = Qfalse; - } + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages)) { + coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq)); + if (NIL_P(coverage)) coverage = Qfalse; + } } ISEQ_COVERAGE_SET(iseq, coverage); if (coverage && ISEQ_BRANCH_COVERAGE(iseq)) - ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_tmp_new(0)); + ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_hidden_new(0)); return Qtrue; } @@ -669,7 +668,7 @@ rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq) { #if VM_INSN_INFO_TABLE_IMPL == 2 /* create succ_index_table */ - struct rb_iseq_constant_body *const body = iseq->body; + struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); int size = body->insns_info.size; int max_pos = body->iseq_size; int *data = (int *)body->insns_info.positions; @@ -706,7 +705,7 @@ static VALUE finish_iseq_build(rb_iseq_t *iseq) { struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq); - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE err = data->err_info; ISEQ_COMPILE_DATA_CLEAR(iseq); compile_data_free(data); @@ -716,33 +715,40 @@ finish_iseq_build(rb_iseq_t *iseq) #endif if (RTEST(err)) { - VALUE path = pathobj_path(body->location.pathobj); - if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error"); - rb_funcallv(err, rb_intern("set_backtrace"), 1, &path); - rb_exc_raise(err); + VALUE path = pathobj_path(body->location.pathobj); + if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error"); + rb_funcallv(err, rb_intern("set_backtrace"), 1, &path); + rb_exc_raise(err); } RB_DEBUG_COUNTER_INC(iseq_num); - RB_DEBUG_COUNTER_ADD(iseq_cd_num, iseq->body->ci_size); + RB_DEBUG_COUNTER_ADD(iseq_cd_num, ISEQ_BODY(iseq)->ci_size); rb_iseq_init_trace(iseq); return Qtrue; } static rb_compile_option_t COMPILE_OPTION_DEFAULT = { - OPT_INLINE_CONST_CACHE, /* int inline_const_cache; */ - OPT_PEEPHOLE_OPTIMIZATION, /* int peephole_optimization; */ - OPT_TAILCALL_OPTIMIZATION, /* int tailcall_optimization */ - OPT_SPECIALISED_INSTRUCTION, /* int specialized_instruction; */ - OPT_OPERANDS_UNIFICATION, /* int operands_unification; */ - OPT_INSTRUCTIONS_UNIFICATION, /* int instructions_unification; */ - OPT_STACK_CACHING, /* int stack_caching; */ - OPT_FROZEN_STRING_LITERAL, - OPT_DEBUG_FROZEN_STRING_LITERAL, - TRUE, /* coverage_enabled */ + .inline_const_cache = OPT_INLINE_CONST_CACHE, + .peephole_optimization = OPT_PEEPHOLE_OPTIMIZATION, + .tailcall_optimization = OPT_TAILCALL_OPTIMIZATION, + .specialized_instruction = OPT_SPECIALISED_INSTRUCTION, + .operands_unification = OPT_OPERANDS_UNIFICATION, + .instructions_unification = OPT_INSTRUCTIONS_UNIFICATION, + .frozen_string_literal = OPT_FROZEN_STRING_LITERAL, + .debug_frozen_string_literal = OPT_DEBUG_FROZEN_STRING_LITERAL, + .coverage_enabled = TRUE, }; -static const rb_compile_option_t COMPILE_OPTION_FALSE = {0}; +static const rb_compile_option_t COMPILE_OPTION_FALSE = { + .frozen_string_literal = -1, // unspecified +}; + +int +rb_iseq_opt_frozen_string_literal(void) +{ + return COMPILE_OPTION_DEFAULT.frozen_string_literal; +} static void set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) @@ -753,7 +759,7 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) else if (flag == Qfalse) { (o)->mem = 0; } \ } #define SET_COMPILE_OPTION_NUM(o, h, mem) \ - { VALUE num = rb_hash_aref(opt, ID2SYM(rb_intern(#mem))); \ + { VALUE num = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \ if (!NIL_P(num)) (o)->mem = NUM2INT(num); \ } SET_COMPILE_OPTION(option, opt, inline_const_cache); @@ -762,7 +768,6 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) SET_COMPILE_OPTION(option, opt, specialized_instruction); SET_COMPILE_OPTION(option, opt, operands_unification); SET_COMPILE_OPTION(option, opt, instructions_unification); - SET_COMPILE_OPTION(option, opt, stack_caching); SET_COMPILE_OPTION(option, opt, frozen_string_literal); SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal); SET_COMPILE_OPTION(option, opt, coverage_enabled); @@ -771,33 +776,39 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) #undef SET_COMPILE_OPTION_NUM } -static void -rb_iseq_make_compile_option(rb_compile_option_t *option, VALUE opt) +static rb_compile_option_t * +set_compile_option_from_ast(rb_compile_option_t *option, const rb_ast_body_t *ast) { - Check_Type(opt, T_HASH); - set_compile_option_from_hash(option, opt); +#define SET_COMPILE_OPTION(o, a, mem) \ + ((a)->mem < 0 ? 0 : ((o)->mem = (a)->mem > 0)) + SET_COMPILE_OPTION(option, ast, coverage_enabled); +#undef SET_COMPILE_OPTION + if (ast->frozen_string_literal >= 0) { + option->frozen_string_literal = ast->frozen_string_literal; + } + return option; } static void make_compile_option(rb_compile_option_t *option, VALUE opt) { if (NIL_P(opt)) { - *option = COMPILE_OPTION_DEFAULT; + *option = COMPILE_OPTION_DEFAULT; } else if (opt == Qfalse) { - *option = COMPILE_OPTION_FALSE; + *option = COMPILE_OPTION_FALSE; } else if (opt == Qtrue) { - int i; - for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i) - ((int *)option)[i] = 1; + int i; + for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i) + ((int *)option)[i] = 1; } else if (RB_TYPE_P(opt, T_HASH)) { - *option = COMPILE_OPTION_DEFAULT; - set_compile_option_from_hash(option, opt); + *option = COMPILE_OPTION_DEFAULT; + set_compile_option_from_hash(option, opt); } else { - rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil"); + rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil"); } } @@ -810,73 +821,138 @@ make_compile_option_value(rb_compile_option_t *option) #define SET_COMPILE_OPTION_NUM(o, h, mem) \ rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem)) { - SET_COMPILE_OPTION(option, opt, inline_const_cache); - SET_COMPILE_OPTION(option, opt, peephole_optimization); - SET_COMPILE_OPTION(option, opt, tailcall_optimization); - SET_COMPILE_OPTION(option, opt, specialized_instruction); - SET_COMPILE_OPTION(option, opt, operands_unification); - SET_COMPILE_OPTION(option, opt, instructions_unification); - SET_COMPILE_OPTION(option, opt, stack_caching); - SET_COMPILE_OPTION(option, opt, frozen_string_literal); - SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal); - SET_COMPILE_OPTION(option, opt, coverage_enabled); - SET_COMPILE_OPTION_NUM(option, opt, debug_level); + SET_COMPILE_OPTION(option, opt, inline_const_cache); + SET_COMPILE_OPTION(option, opt, peephole_optimization); + SET_COMPILE_OPTION(option, opt, tailcall_optimization); + SET_COMPILE_OPTION(option, opt, specialized_instruction); + SET_COMPILE_OPTION(option, opt, operands_unification); + SET_COMPILE_OPTION(option, opt, instructions_unification); + SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal); + SET_COMPILE_OPTION(option, opt, coverage_enabled); + SET_COMPILE_OPTION_NUM(option, opt, debug_level); } #undef SET_COMPILE_OPTION #undef SET_COMPILE_OPTION_NUM + VALUE frozen_string_literal = option->frozen_string_literal == -1 ? Qnil : RBOOL(option->frozen_string_literal); + rb_hash_aset(opt, ID2SYM(rb_intern("frozen_string_literal")), frozen_string_literal); return opt; } rb_iseq_t * -rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, - const rb_iseq_t *parent, enum iseq_type type) +rb_iseq_new(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, + const rb_iseq_t *parent, enum rb_iseq_type type) { - return rb_iseq_new_with_opt(ast, name, path, realpath, INT2FIX(0), parent, - 0, type, &COMPILE_OPTION_DEFAULT); + return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, + 0, type, &COMPILE_OPTION_DEFAULT, + Qnil); } static int -ast_line_count(const rb_ast_body_t *ast) +ast_line_count(const VALUE ast_value) { - if (ast->script_lines == Qfalse) { - // this occurs when failed to parse the source code with a syntax error - return 0; - } - if (RB_TYPE_P(ast->script_lines, T_ARRAY)){ - return (int)RARRAY_LEN(ast->script_lines); + rb_ast_t *ast = rb_ruby_ast_data_get(ast_value); + return ast->body.line_count; +} + +static VALUE +iseq_setup_coverage(VALUE coverages, VALUE path, int line_count) +{ + if (line_count >= 0) { + int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count; + + VALUE coverage = rb_default_coverage(len); + rb_hash_aset(coverages, path, coverage); + + return coverage; } - return FIX2INT(ast->script_lines); + + return Qnil; } -rb_iseq_t * -rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent) +static inline void +iseq_new_setup_coverage(VALUE path, int line_count) { VALUE coverages = rb_get_coverages(); + if (RTEST(coverages)) { - int line_count = ast_line_count(ast); - if (line_count >= 0) { - int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count; - VALUE coverage = rb_default_coverage(len); - rb_hash_aset(coverages, path, coverage); - } + iseq_setup_coverage(coverages, path, line_count); } +} + +rb_iseq_t * +rb_iseq_new_top(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent) +{ + iseq_new_setup_coverage(path, ast_line_count(ast_value)); + + return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, 0, + ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, + Qnil); +} + +/** + * The main entry-point into the prism compiler when a file is required. + */ +rb_iseq_t * +pm_iseq_new_top(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent) +{ + iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1)); - return rb_iseq_new_with_opt(ast, name, path, realpath, INT2FIX(0), parent, 0, + return pm_iseq_new_with_opt(node, name, path, realpath, 0, parent, 0, ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT); } rb_iseq_t * -rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_iseq_t *parent) +rb_iseq_new_main(const VALUE ast_value, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt) +{ + iseq_new_setup_coverage(path, ast_line_count(ast_value)); + + return rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"), + path, realpath, 0, + parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, + Qnil); +} + +/** + * The main entry-point into the prism compiler when a file is executed as the + * main file in the program. + */ +rb_iseq_t * +pm_iseq_new_main(pm_scope_node_t *node, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt) +{ + iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1)); + + return pm_iseq_new_with_opt(node, rb_fstring_lit("<main>"), + path, realpath, 0, + parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE); +} + +rb_iseq_t * +rb_iseq_new_eval(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth) { - return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"), - path, realpath, INT2FIX(0), - parent, 0, ISEQ_TYPE_MAIN, &COMPILE_OPTION_DEFAULT); + if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) { + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) { + iseq_setup_coverage(coverages, path, ast_line_count(ast_value) + first_lineno - 1); + } + } + + return rb_iseq_new_with_opt(ast_value, name, path, realpath, first_lineno, + parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT, + Qnil); } rb_iseq_t * -rb_iseq_new_eval(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, VALUE first_lineno, const rb_iseq_t *parent, int isolated_depth) +pm_iseq_new_eval(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth) { - return rb_iseq_new_with_opt(ast, name, path, realpath, first_lineno, + if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) { + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) { + iseq_setup_coverage(coverages, path, ((int) (node->parser->newline_list.size - 1)) + first_lineno - 1); + } + } + + return pm_iseq_new_with_opt(node, name, path, realpath, first_lineno, parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT); } @@ -884,45 +960,47 @@ static inline rb_iseq_t * iseq_translate(rb_iseq_t *iseq) { if (rb_respond_to(rb_cISeq, rb_intern("translate"))) { - VALUE v1 = iseqw_new(iseq); - VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1); - if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) { - iseq = (rb_iseq_t *)iseqw_check(v2); - } + VALUE v1 = iseqw_new(iseq); + VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1); + if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) { + iseq = (rb_iseq_t *)iseqw_check(v2); + } } return iseq; } rb_iseq_t * -rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, - VALUE first_lineno, const rb_iseq_t *parent, int isolated_depth, - enum iseq_type type, const rb_compile_option_t *option) -{ - const NODE *node = ast ? ast->root : 0; +rb_iseq_new_with_opt(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth, + enum rb_iseq_type type, const rb_compile_option_t *option, + VALUE script_lines) +{ + rb_ast_t *ast = rb_ruby_ast_data_get(ast_value); + rb_ast_body_t *body = ast ? &ast->body : NULL; + const NODE *node = body ? body->root : 0; /* TODO: argument check */ rb_iseq_t *iseq = iseq_alloc(); rb_compile_option_t new_opt; - if (option) { + if (!option) option = &COMPILE_OPTION_DEFAULT; + if (body) { new_opt = *option; + option = set_compile_option_from_ast(&new_opt, body); } - else { - new_opt = COMPILE_OPTION_DEFAULT; - } - if (ast && ast->compile_option) rb_iseq_make_compile_option(&new_opt, ast->compile_option); - - VALUE script_lines = Qnil; - if (ast && !FIXNUM_P(ast->script_lines) && ast->script_lines) { - script_lines = ast->script_lines; + if (!NIL_P(script_lines)) { + // noop + } + else if (body && body->script_lines) { + script_lines = rb_parser_build_script_lines_from(body->script_lines); } else if (parent) { - script_lines = parent->body->variable.script_lines; + script_lines = ISEQ_BODY(parent)->variable.script_lines; } prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, node ? nd_node_id(node) : -1, - parent, isolated_depth, type, script_lines, &new_opt); + parent, isolated_depth, type, script_lines, option); rb_iseq_compile_node(iseq, node); finish_iseq_build(iseq); @@ -930,12 +1008,55 @@ rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE rea return iseq_translate(iseq); } +/** + * This is a step in the prism compiler that is called once all of the various + * options have been established. It is called from one of the pm_iseq_new_* + * functions or from the RubyVM::InstructionSequence APIs. It is responsible for + * allocating the instruction sequence, calling into the compiler, and returning + * the built instruction sequence. + * + * Importantly, this is also the function where the compiler is re-entered to + * compile child instruction sequences. A child instruction sequence is always + * compiled using a scope node, which is why we cast it explicitly to that here + * in the parameters (as opposed to accepting a generic pm_node_t *). + */ +rb_iseq_t * +pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth, + enum rb_iseq_type type, const rb_compile_option_t *option) +{ + rb_iseq_t *iseq = iseq_alloc(); + ISEQ_BODY(iseq)->prism = true; + ISEQ_BODY(iseq)->param.flags.use_block = true; // unused block warning is not supported yet + + if (!option) option = &COMPILE_OPTION_DEFAULT; + + pm_location_t *location = &node->base.location; + int32_t start_line = node->parser->start_line; + + pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line); + pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line); + + rb_code_location_t code_location = (rb_code_location_t) { + .beg_pos = { .lineno = (int) start.line, .column = (int) start.column }, + .end_pos = { .lineno = (int) end.line, .column = (int) end.column } + }; + + prepare_iseq_build(iseq, name, path, realpath, first_lineno, &code_location, -1, + parent, isolated_depth, type, Qnil, option); + + pm_iseq_compile_node(iseq, node); + finish_iseq_build(iseq); + + return iseq_translate(iseq); +} + rb_iseq_t * rb_iseq_new_with_callback( const struct rb_iseq_new_with_callback_callback_func * ifunc, VALUE name, VALUE path, VALUE realpath, - VALUE first_lineno, const rb_iseq_t *parent, - enum iseq_type type, const rb_compile_option_t *option) + int first_lineno, const rb_iseq_t *parent, + enum rb_iseq_type type, const rb_compile_option_t *option) { /* TODO: argument check */ rb_iseq_t *iseq = iseq_alloc(); @@ -955,7 +1076,7 @@ rb_iseq_load_iseq(VALUE fname) VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname); if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) { - return iseqw_check(iseqv); + return iseqw_check(iseqv); } return NULL; @@ -967,7 +1088,7 @@ rb_iseq_load_iseq(VALUE fname) #define CHECK_SYMBOL(v) rb_to_symbol_type(v) static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;} -static enum iseq_type +static enum rb_iseq_type iseq_type_from_sym(VALUE type) { const ID id_top = rb_intern("top"); @@ -991,7 +1112,7 @@ iseq_type_from_sym(VALUE type) if (typeid == id_eval) return ISEQ_TYPE_EVAL; if (typeid == id_main) return ISEQ_TYPE_MAIN; if (typeid == id_plain) return ISEQ_TYPE_PLAIN; - return (enum iseq_type)-1; + return (enum rb_iseq_type)-1; } static VALUE @@ -1000,7 +1121,7 @@ iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt) rb_iseq_t *iseq = iseq_alloc(); VALUE magic, version1, version2, format_type, misc; - VALUE name, path, realpath, first_lineno, code_location, node_id; + VALUE name, path, realpath, code_location, node_id; VALUE type, body, locals, params, exception; st_data_t iseq_type; @@ -1026,7 +1147,7 @@ iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt) path = CHECK_STRING(rb_ary_entry(data, i++)); realpath = rb_ary_entry(data, i++); realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath); - first_lineno = CHECK_INTEGER(rb_ary_entry(data, i++)); + int first_lineno = RB_NUM2INT(rb_ary_entry(data, i++)); type = CHECK_SYMBOL(rb_ary_entry(data, i++)); locals = CHECK_ARRAY(rb_ary_entry(data, i++)); @@ -1034,27 +1155,31 @@ iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt) exception = CHECK_ARRAY(rb_ary_entry(data, i++)); body = CHECK_ARRAY(rb_ary_entry(data, i++)); - iseq->body->local_iseq = iseq; + ISEQ_BODY(iseq)->local_iseq = iseq; iseq_type = iseq_type_from_sym(type); - if (iseq_type == (enum iseq_type)-1) { - rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type)); + if (iseq_type == (enum rb_iseq_type)-1) { + rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type)); } node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id"))); code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location"))); if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) { - tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0)); - tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1)); - tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2)); - tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3)); + tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0)); + tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1)); + tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2)); + tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3)); + } + + if (SYM2ID(rb_hash_aref(misc, ID2SYM(rb_intern("parser")))) == rb_intern("prism")) { + ISEQ_BODY(iseq)->prism = true; } make_compile_option(&option, opt); option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */ prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id), - parent, 0, (enum iseq_type)iseq_type, Qnil, &option); + parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option); rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body); @@ -1090,40 +1215,97 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V #else # define INITIALIZED /* volatile */ #endif - rb_ast_t *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start); + VALUE (*parse)(VALUE vparser, VALUE fname, VALUE file, int start); int ln; - rb_ast_t *INITIALIZED ast; + VALUE INITIALIZED ast_value; + rb_ast_t *ast; + VALUE name = rb_fstring_lit("<compiled>"); /* safe results first */ make_compile_option(&option, opt); ln = NUM2INT(line); StringValueCStr(file); if (RB_TYPE_P(src, T_FILE)) { - parse = rb_parser_compile_file_path; + parse = rb_parser_compile_file_path; } else { - parse = rb_parser_compile_string_path; - StringValue(src); + parse = rb_parser_compile_string_path; + StringValue(src); } { - const VALUE parser = rb_parser_new(); - VALUE name = rb_fstring_lit("<compiled>"); - const rb_iseq_t *outer_scope = rb_iseq_new(NULL, name, name, Qnil, 0, ISEQ_TYPE_TOP); + const VALUE parser = rb_parser_new(); + const rb_iseq_t *outer_scope = rb_iseq_new(Qnil, name, name, Qnil, 0, ISEQ_TYPE_TOP); VALUE outer_scope_v = (VALUE)outer_scope; rb_parser_set_context(parser, outer_scope, FALSE); + if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser); RB_GC_GUARD(outer_scope_v); - ast = (*parse)(parser, file, src, ln); + ast_value = (*parse)(parser, file, src, ln); } - if (!ast->body.root) { - rb_ast_dispose(ast); - rb_exc_raise(GET_EC()->errinfo); + ast = rb_ruby_ast_data_get(ast_value); + + if (!ast || !ast->body.root) { + rb_ast_dispose(ast); + rb_exc_raise(GET_EC()->errinfo); } else { - INITIALIZED VALUE label = rb_fstring_lit("<compiled>"); - iseq = rb_iseq_new_with_opt(&ast->body, label, file, realpath, line, - NULL, 0, ISEQ_TYPE_TOP, &option); - rb_ast_dispose(ast); + iseq = rb_iseq_new_with_opt(ast_value, name, file, realpath, ln, + NULL, 0, ISEQ_TYPE_TOP, &option, + Qnil); + rb_ast_dispose(ast); + } + + return iseq; +} + +static rb_iseq_t * +pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt) +{ + rb_iseq_t *iseq = NULL; + rb_compile_option_t option; + int ln; + VALUE name = rb_fstring_lit("<compiled>"); + + /* safe results first */ + make_compile_option(&option, opt); + ln = NUM2INT(line); + StringValueCStr(file); + + pm_parse_result_t result = { 0 }; + pm_options_line_set(&result.options, NUM2INT(line)); + + switch (option.frozen_string_literal) { + case ISEQ_FROZEN_STRING_LITERAL_UNSET: + break; + case ISEQ_FROZEN_STRING_LITERAL_DISABLED: + pm_options_frozen_string_literal_set(&result.options, false); + break; + case ISEQ_FROZEN_STRING_LITERAL_ENABLED: + pm_options_frozen_string_literal_set(&result.options, true); + break; + default: + rb_bug("pm_iseq_compile_with_option: invalid frozen_string_literal=%d", option.frozen_string_literal); + break; + } + + VALUE error; + if (RB_TYPE_P(src, T_FILE)) { + VALUE filepath = rb_io_path(src); + error = pm_load_parse_file(&result, filepath); + RB_GC_GUARD(filepath); + } + else { + src = StringValue(src); + error = pm_parse_string(&result, src, file); + } + + if (error == Qnil) { + iseq = pm_iseq_new_with_opt(&result.node, name, file, realpath, ln, NULL, 0, ISEQ_TYPE_TOP, &option); + pm_parse_result_free(&result); + } + else { + pm_parse_result_free(&result); + rb_exc_raise(error); } return iseq; @@ -1132,13 +1314,13 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V VALUE rb_iseq_path(const rb_iseq_t *iseq) { - return pathobj_path(iseq->body->location.pathobj); + return pathobj_path(ISEQ_BODY(iseq)->location.pathobj); } VALUE rb_iseq_realpath(const rb_iseq_t *iseq) { - return pathobj_realpath(iseq->body->location.pathobj); + return pathobj_realpath(ISEQ_BODY(iseq)->location.pathobj); } VALUE @@ -1156,44 +1338,52 @@ rb_iseq_from_eval_p(const rb_iseq_t *iseq) VALUE rb_iseq_label(const rb_iseq_t *iseq) { - return iseq->body->location.label; + return ISEQ_BODY(iseq)->location.label; } VALUE rb_iseq_base_label(const rb_iseq_t *iseq) { - return iseq->body->location.base_label; + return ISEQ_BODY(iseq)->location.base_label; } VALUE rb_iseq_first_lineno(const rb_iseq_t *iseq) { - return iseq->body->location.first_lineno; + return RB_INT2NUM(ISEQ_BODY(iseq)->location.first_lineno); } VALUE rb_iseq_method_name(const rb_iseq_t *iseq) { - struct rb_iseq_constant_body *const body = iseq->body->local_iseq->body; + struct rb_iseq_constant_body *const body = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq); if (body->type == ISEQ_TYPE_METHOD) { - return body->location.base_label; + return body->location.base_label; } else { - return Qnil; + return Qnil; } } void rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column) { - const rb_code_location_t *loc = &iseq->body->location.code_location; + const rb_code_location_t *loc = &ISEQ_BODY(iseq)->location.code_location; if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno; if (beg_pos_column) *beg_pos_column = loc->beg_pos.column; if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno; if (end_pos_column) *end_pos_column = loc->end_pos.column; } +static ID iseq_type_id(enum rb_iseq_type type); + +VALUE +rb_iseq_type(const rb_iseq_t *iseq) +{ + return ID2SYM(iseq_type_id(ISEQ_BODY(iseq)->type)); +} + VALUE rb_iseq_coverage(const rb_iseq_t *iseq) { @@ -1208,10 +1398,10 @@ remove_coverage_i(void *vstart, void *vend, size_t stride, void *data) void *ptr = asan_poisoned_object_p(v); asan_unpoison_object(v, false); - if (rb_obj_is_iseq(v)) { + if (rb_obj_is_iseq(v)) { rb_iseq_t *iseq = (rb_iseq_t *)v; ISEQ_COVERAGE_SET(iseq, Qnil); - } + } asan_poison_object_if(ptr, v); } @@ -1229,18 +1419,30 @@ rb_iseq_remove_coverage_all(void) static void iseqw_mark(void *ptr) { - rb_gc_mark((VALUE)ptr); + rb_gc_mark_movable(*(VALUE *)ptr); } static size_t iseqw_memsize(const void *ptr) { - return rb_iseq_memsize((const rb_iseq_t *)ptr); + return rb_iseq_memsize(*(const rb_iseq_t **)ptr); +} + +static void +iseqw_ref_update(void *ptr) +{ + VALUE *vptr = ptr; + *vptr = rb_gc_location(*vptr); } static const rb_data_type_t iseqw_data_type = { "T_IMEMO/iseq", - {iseqw_mark, NULL, iseqw_memsize,}, + { + iseqw_mark, + RUBY_TYPED_DEFAULT_FREE, + iseqw_memsize, + iseqw_ref_update, + }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED }; @@ -1248,14 +1450,16 @@ static VALUE iseqw_new(const rb_iseq_t *iseq) { if (iseq->wrapper) { + if (*(const rb_iseq_t **)rb_check_typeddata(iseq->wrapper, &iseqw_data_type) != iseq) { + rb_raise(rb_eTypeError, "wrong iseq wrapper: %" PRIsVALUE " for %p", + iseq->wrapper, (void *)iseq); + } return iseq->wrapper; } else { - union { const rb_iseq_t *in; void *out; } deconst; - VALUE obj; - deconst.in = iseq; - obj = TypedData_Wrap_Struct(rb_cISeq, &iseqw_data_type, deconst.out); - RB_OBJ_WRITTEN(obj, Qundef, iseq); + rb_iseq_t **ptr; + VALUE obj = TypedData_Make_Struct(rb_cISeq, rb_iseq_t *, &iseqw_data_type, ptr); + RB_OBJ_WRITE(obj, ptr, iseq); /* cache a wrapper object */ RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj); @@ -1271,13 +1475,51 @@ rb_iseqw_new(const rb_iseq_t *iseq) return iseqw_new(iseq); } +/** + * Accept the options given to InstructionSequence.compile and + * InstructionSequence.compile_prism and share the logic for creating the + * instruction sequence. + */ +static VALUE +iseqw_s_compile_parser(int argc, VALUE *argv, VALUE self, bool prism) +{ + VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil; + int i; + + i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); + if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5); + switch (i) { + case 5: opt = argv[--i]; + case 4: line = argv[--i]; + case 3: path = argv[--i]; + case 2: file = argv[--i]; + } + + if (NIL_P(file)) file = rb_fstring_lit("<compiled>"); + if (NIL_P(path)) path = file; + if (NIL_P(line)) line = INT2FIX(1); + + Check_Type(path, T_STRING); + Check_Type(file, T_STRING); + + rb_iseq_t *iseq; + if (prism) { + iseq = pm_iseq_compile_with_option(src, file, path, line, opt); + } + else { + iseq = rb_iseq_compile_with_option(src, file, path, line, opt); + } + + return iseqw_new(iseq); +} + /* * call-seq: * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq * - * Takes +source+, a String of Ruby code and compiles it to an - * InstructionSequence. + * Takes +source+, which can be a string of Ruby code, or an open +File+ object. + * that contains Ruby source code. * * Optionally takes +file+, +path+, and +line+ which describe the file path, * real path and first line number of the ruby code in +source+ which are @@ -1299,6 +1541,10 @@ rb_iseqw_new(const rb_iseq_t *iseq) * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path)) * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1> * + * file = File.open("test.rb") + * RubyVM::InstructionSequence.compile(file) + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1> + * * path = File.expand_path("test.rb") * RubyVM::InstructionSequence.compile(File.read(path), path, path) * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1> @@ -1307,26 +1553,49 @@ rb_iseqw_new(const rb_iseq_t *iseq) static VALUE iseqw_s_compile(int argc, VALUE *argv, VALUE self) { - VALUE src, file = Qnil, path = Qnil, line = INT2FIX(1), opt = Qnil; - int i; - - i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); - if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5); - switch (i) { - case 5: opt = argv[--i]; - case 4: line = argv[--i]; - case 3: path = argv[--i]; - case 2: file = argv[--i]; - } - - if (NIL_P(file)) file = rb_fstring_lit("<compiled>"); - if (NIL_P(path)) path = file; - if (NIL_P(line)) line = INT2FIX(1); - - Check_Type(path, T_STRING); - Check_Type(file, T_STRING); + return iseqw_s_compile_parser(argc, argv, self, *rb_ruby_prism_ptr()); +} - return iseqw_new(rb_iseq_compile_with_option(src, file, path, line, opt)); +/* + * call-seq: + * InstructionSequence.compile_prism(source[, file[, path[, line[, options]]]]) -> iseq + * + * Takes +source+, which can be a string of Ruby code, or an open +File+ object. + * that contains Ruby source code. It parses and compiles using prism. + * + * Optionally takes +file+, +path+, and +line+ which describe the file path, + * real path and first line number of the ruby code in +source+ which are + * metadata attached to the returned +iseq+. + * + * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for + * +require_relative+ base. It is recommended these should be the same full + * path. + * + * +options+, which can be +true+, +false+ or a +Hash+, is used to + * modify the default behavior of the Ruby iseq compiler. + * + * For details regarding valid compile options see ::compile_option=. + * + * RubyVM::InstructionSequence.compile("a = 1 + 2") + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>> + * + * path = "test.rb" + * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path)) + * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1> + * + * file = File.open("test.rb") + * RubyVM::InstructionSequence.compile(file) + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1> + * + * path = File.expand_path("test.rb") + * RubyVM::InstructionSequence.compile(File.read(path), path, path) + * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1> + * + */ +static VALUE +iseqw_s_compile_prism(int argc, VALUE *argv, VALUE self) +{ + return iseqw_s_compile_parser(argc, argv, self, true); } /* @@ -1352,9 +1621,10 @@ iseqw_s_compile(int argc, VALUE *argv, VALUE self) static VALUE iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) { - VALUE file, line = INT2FIX(1), opt = Qnil; + VALUE file, opt = Qnil; VALUE parser, f, exc = Qnil, ret; rb_ast_t *ast; + VALUE ast_value; rb_compile_option_t option; int i; @@ -1368,29 +1638,100 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) f = rb_file_open_str(file, "r"); + rb_execution_context_t *ec = GET_EC(); + VALUE v = rb_vm_push_frame_fname(ec, file); + parser = rb_parser_new(); rb_parser_set_context(parser, NULL, FALSE); - ast = (rb_ast_t *)rb_parser_load_file(parser, file); + ast_value = rb_parser_load_file(parser, file); + ast = rb_ruby_ast_data_get(ast_value); if (!ast->body.root) exc = GET_EC()->errinfo; rb_io_close(f); if (!ast->body.root) { - rb_ast_dispose(ast); - rb_exc_raise(exc); + rb_ast_dispose(ast); + rb_exc_raise(exc); } make_compile_option(&option, opt); - ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"), - file, - rb_realpath_internal(Qnil, file, 1), - line, NULL, 0, ISEQ_TYPE_TOP, &option)); + ret = iseqw_new(rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"), + file, + rb_realpath_internal(Qnil, file, 1), + 1, NULL, 0, ISEQ_TYPE_TOP, &option, + Qnil)); rb_ast_dispose(ast); + + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); return ret; } /* * call-seq: + * InstructionSequence.compile_file_prism(file[, options]) -> iseq + * + * Takes +file+, a String with the location of a Ruby source file, reads, + * parses and compiles the file, and returns +iseq+, the compiled + * InstructionSequence with source location metadata set. It parses and + * compiles using prism. + * + * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to + * modify the default behavior of the Ruby iseq compiler. + * + * For details regarding valid compile options see ::compile_option=. + * + * # /tmp/hello.rb + * puts "Hello, world!" + * + * # elsewhere + * RubyVM::InstructionSequence.compile_file_prism("/tmp/hello.rb") + * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb> + */ +static VALUE +iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self) +{ + VALUE file, opt = Qnil, ret; + rb_compile_option_t option; + int i; + + i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt); + if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2); + switch (i) { + case 2: opt = argv[--i]; + } + FilePathValue(file); + file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */ + + rb_execution_context_t *ec = GET_EC(); + VALUE v = rb_vm_push_frame_fname(ec, file); + + pm_parse_result_t result = { 0 }; + result.options.line = 1; + + VALUE error = pm_load_parse_file(&result, file); + + if (error == Qnil) { + make_compile_option(&option, opt); + + ret = iseqw_new(pm_iseq_new_with_opt(&result.node, rb_fstring_lit("<main>"), + file, + rb_realpath_internal(Qnil, file, 1), + 1, NULL, 0, ISEQ_TYPE_TOP, &option)); + pm_parse_result_free(&result); + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); + return ret; + } else { + pm_parse_result_free(&result); + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); + rb_exc_raise(error); + } +} + +/* + * call-seq: * InstructionSequence.compile_option = options * * Sets the default values for various optimizations in the Ruby iseq @@ -1411,7 +1752,6 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) * * +:operands_unification+ * * +:peephole_optimization+ * * +:specialized_instruction+ - * * +:stack_caching+ * * +:tailcall_optimization+ * * Additionally, +:debug_level+ can be set to an integer. @@ -1446,14 +1786,16 @@ iseqw_s_compile_option_get(VALUE self) static const rb_iseq_t * iseqw_check(VALUE iseqw) { - rb_iseq_t *iseq = DATA_PTR(iseqw); + rb_iseq_t **iseq_ptr; + TypedData_Get_Struct(iseqw, rb_iseq_t *, &iseqw_data_type, iseq_ptr); + rb_iseq_t *iseq = *iseq_ptr; - if (!iseq->body) { - rb_ibf_load_iseq_complete(iseq); + if (!ISEQ_BODY(iseq)) { + rb_ibf_load_iseq_complete(iseq); } - if (!iseq->body->location.label) { - rb_raise(rb_eTypeError, "uninitialized InstructionSequence"); + if (!ISEQ_BODY(iseq)->location.label) { + rb_raise(rb_eTypeError, "uninitialized InstructionSequence"); } return iseq; } @@ -1486,17 +1828,17 @@ static VALUE iseqw_inspect(VALUE self) { const rb_iseq_t *iseq = iseqw_check(self); - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE klass = rb_class_name(rb_obj_class(self)); if (!body->location.label) { - return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass); + return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass); } else { - return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>", - klass, - body->location.label, rb_iseq_path(iseq), - FIX2INT(rb_iseq_first_lineno(iseq))); + return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>", + klass, + body->location.label, rb_iseq_path(iseq), + FIX2INT(rb_iseq_first_lineno(iseq))); } } @@ -1716,45 +2058,45 @@ iseqw_to_a(VALUE self) static const struct iseq_insn_info_entry * get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); size_t size = body->insns_info.size; const struct iseq_insn_info_entry *insns_info = body->insns_info.body; const unsigned int *positions = body->insns_info.positions; const int debug = 0; if (debug) { - printf("size: %"PRIuSIZE"\n", size); - printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", - (size_t)0, positions[0], insns_info[0].line_no, pos); + printf("size: %"PRIuSIZE"\n", size); + printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", + (size_t)0, positions[0], insns_info[0].line_no, pos); } if (size == 0) { - return NULL; + return NULL; } else if (size == 1) { - return &insns_info[0]; + return &insns_info[0]; } else { - size_t l = 1, r = size - 1; - while (l <= r) { - size_t m = l + (r - l) / 2; - if (positions[m] == pos) { - return &insns_info[m]; - } - if (positions[m] < pos) { - l = m + 1; - } - else { - r = m - 1; - } - } - if (l >= size) { - return &insns_info[size-1]; - } - if (positions[l] > pos) { - return &insns_info[l-1]; - } - return &insns_info[l]; + size_t l = 1, r = size - 1; + while (l <= r) { + size_t m = l + (r - l) / 2; + if (positions[m] == pos) { + return &insns_info[m]; + } + if (positions[m] < pos) { + l = m + 1; + } + else { + r = m - 1; + } + } + if (l >= size) { + return &insns_info[size-1]; + } + if (positions[l] > pos) { + return &insns_info[l-1]; + } + return &insns_info[l]; } } @@ -1769,7 +2111,7 @@ get_insn_info(const rb_iseq_t *iseq, size_t pos) static const struct iseq_insn_info_entry * get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); size_t size = body->insns_info.size; const struct iseq_insn_info_entry *insns_info = body->insns_info.body; const int debug = 0; @@ -1788,16 +2130,16 @@ get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos) } if (size == 0) { - return NULL; + return NULL; } else if (size == 1) { - return &insns_info[0]; + return &insns_info[0]; } else { - int index; - VM_ASSERT(body->insns_info.succ_index_table != NULL); - index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos); - return &insns_info[index-1]; + int index; + VM_ASSERT(body->insns_info.succ_index_table != NULL); + index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos); + return &insns_info[index-1]; } } @@ -1812,36 +2154,36 @@ get_insn_info(const rb_iseq_t *iseq, size_t pos) static const struct iseq_insn_info_entry * get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); size_t i = 0, size = body->insns_info.size; const struct iseq_insn_info_entry *insns_info = body->insns_info.body; const unsigned int *positions = body->insns_info.positions; const int debug = 0; if (debug) { - printf("size: %"PRIuSIZE"\n", size); - printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", - i, positions[i], insns_info[i].line_no, pos); + printf("size: %"PRIuSIZE"\n", size); + printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", + i, positions[i], insns_info[i].line_no, pos); } if (size == 0) { - return NULL; + return NULL; } else if (size == 1) { - return &insns_info[0]; + return &insns_info[0]; } else { - for (i=1; i<size; i++) { - if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", - i, positions[i], insns_info[i].line_no, pos); - - if (positions[i] == pos) { - return &insns_info[i]; - } - if (positions[i] > pos) { - return &insns_info[i-1]; - } - } + for (i=1; i<size; i++) { + if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n", + i, positions[i], insns_info[i].line_no, pos); + + if (positions[i] == pos) { + return &insns_info[i]; + } + if (positions[i] > pos) { + return &insns_info[i-1]; + } + } } return &insns_info[i-1]; } @@ -1859,12 +2201,12 @@ get_insn_info(const rb_iseq_t *iseq, size_t pos) static void validate_get_insn_info(const rb_iseq_t *iseq) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); size_t i; for (i = 0; i < body->iseq_size; i++) { - if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) { - rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i); - } + if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) { + rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i); + } } } #endif @@ -1875,10 +2217,10 @@ rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos) const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos); if (entry) { - return entry->line_no; + return entry->line_no; } else { - return 0; + return 0; } } @@ -1889,23 +2231,23 @@ rb_iseq_node_id(const rb_iseq_t *iseq, size_t pos) const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos); if (entry) { - return entry->node_id; + return entry->node_id; } else { - return 0; + return 0; } } #endif -MJIT_FUNC_EXPORTED rb_event_flag_t +rb_event_flag_t rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos) { const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos); if (entry) { - return entry->events; + return entry->events; } else { - return 0; + return 0; } } @@ -1931,19 +2273,19 @@ local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op) int idx; for (i = 0; i < level; i++) { - diseq = diseq->body->parent_iseq; + diseq = ISEQ_BODY(diseq)->parent_iseq; } - idx = diseq->body->local_table_size - (int)op - 1; - lid = diseq->body->local_table[idx]; + idx = ISEQ_BODY(diseq)->local_table_size - (int)op - 1; + lid = ISEQ_BODY(diseq)->local_table[idx]; name = rb_id2str(lid); if (!name) { - name = rb_str_new_cstr("?"); + name = rb_str_new_cstr("?"); } - else if (!rb_str_symname_p(name)) { - name = rb_str_inspect(name); + else if (!rb_is_local_id(lid)) { + name = rb_str_inspect(name); } else { - name = rb_str_dup(name); + name = rb_str_dup(name); } rb_str_catf(name, "@%d", idx); return name; @@ -1954,8 +2296,8 @@ VALUE rb_dump_literal(VALUE lit); VALUE rb_insn_operand_intern(const rb_iseq_t *iseq, - VALUE insn, int op_no, VALUE op, - int len, size_t pos, const VALUE *pnop, VALUE child) + VALUE insn, int op_no, VALUE op, + int len, size_t pos, const VALUE *pnop, VALUE child) { const char *types = insn_op_types(insn); char type = types[op_no]; @@ -1963,156 +2305,167 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, switch (type) { case TS_OFFSET: /* LONG */ - ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op)); - break; + ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op)); + break; case TS_NUM: /* ULONG */ - if (insn == BIN(defined) && op_no == 0) { - enum defined_type deftype = (enum defined_type)op; - switch (deftype) { - case DEFINED_FUNC: - ret = rb_fstring_lit("func"); - break; - case DEFINED_REF: - ret = rb_fstring_lit("ref"); - break; - case DEFINED_CONST_FROM: - ret = rb_fstring_lit("constant-from"); - break; - default: - ret = rb_iseq_defined_string(deftype); - break; - } - if (ret) break; - } - else if (insn == BIN(checktype) && op_no == 0) { - const char *type_str = rb_type_str((enum ruby_value_type)op); - if (type_str) { - ret = rb_str_new_cstr(type_str); break; - } - } - ret = rb_sprintf("%"PRIuVALUE, op); - break; + if (insn == BIN(defined) && op_no == 0) { + enum defined_type deftype = (enum defined_type)op; + switch (deftype) { + case DEFINED_FUNC: + ret = rb_fstring_lit("func"); + break; + case DEFINED_REF: + ret = rb_fstring_lit("ref"); + break; + case DEFINED_CONST_FROM: + ret = rb_fstring_lit("constant-from"); + break; + default: + ret = rb_iseq_defined_string(deftype); + break; + } + if (ret) break; + } + else if (insn == BIN(checktype) && op_no == 0) { + const char *type_str = rb_type_str((enum ruby_value_type)op); + if (type_str) { + ret = rb_str_new_cstr(type_str); break; + } + } + ret = rb_sprintf("%"PRIuVALUE, op); + break; case TS_LINDEX:{ - int level; - if (types[op_no+1] == TS_NUM && pnop) { - ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE); - } - else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) { - ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE); - } - else { - ret = rb_inspect(INT2FIX(op)); - } - break; + int level; + if (types[op_no+1] == TS_NUM && pnop) { + ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE); + } + else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) { + ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE); + } + else { + ret = rb_inspect(INT2FIX(op)); + } + break; } case TS_ID: /* ID (symbol) */ - ret = rb_inspect(ID2SYM(op)); - break; + ret = rb_inspect(ID2SYM(op)); + break; case TS_VALUE: /* VALUE */ - op = obj_resurrect(op); - if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) { - /* should be DEFINED_REF */ - int type = NUM2INT(op); - if (type) { - if (type & 1) { - ret = rb_sprintf(":$%c", (type >> 1)); - } - else { - ret = rb_sprintf(":$%d", (type >> 1)); - } - break; - } - } - ret = rb_dump_literal(op); - if (CLASS_OF(op) == rb_cISeq) { - if (child) { - rb_ary_push(child, op); - } - } - break; + op = obj_resurrect(op); + if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) { + /* should be DEFINED_REF */ + int type = NUM2INT(op); + if (type) { + if (type & 1) { + ret = rb_sprintf(":$%c", (type >> 1)); + } + else { + ret = rb_sprintf(":$%d", (type >> 1)); + } + break; + } + } + ret = rb_dump_literal(op); + if (CLASS_OF(op) == rb_cISeq) { + if (child) { + rb_ary_push(child, op); + } + } + break; case TS_ISEQ: /* iseq */ - { - if (op) { - const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op); - ret = iseq->body->location.label; - if (child) { - rb_ary_push(child, (VALUE)iseq); - } - } - else { - ret = rb_str_new2("nil"); - } - break; - } + { + if (op) { + const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op); + ret = ISEQ_BODY(iseq)->location.label; + if (child) { + rb_ary_push(child, (VALUE)iseq); + } + } + else { + ret = rb_str_new2("nil"); + } + break; + } case TS_IC: + { + ret = rb_sprintf("<ic:%"PRIdPTRDIFF" ", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries); + const ID *segments = ((IC)op)->segments; + rb_str_cat2(ret, rb_id2name(*segments++)); + while (*segments) { + rb_str_catf(ret, "::%s", rb_id2name(*segments++)); + } + rb_str_cat2(ret, ">"); + } + break; case TS_IVC: + case TS_ICVARC: case TS_ISE: - ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - iseq->body->is_entries); - break; + ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries); + break; case TS_CALLDATA: - { + { struct rb_call_data *cd = (struct rb_call_data *)op; const struct rb_callinfo *ci = cd->ci; - VALUE ary = rb_ary_new(); + VALUE ary = rb_ary_new(); ID mid = vm_ci_mid(ci); if (mid) { - rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid))); - } + rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid))); + } - rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci))); + rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci))); if (vm_ci_flag(ci) & VM_CALL_KWARG) { const struct rb_callinfo_kwarg *kw_args = vm_ci_kwarg(ci); VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords); rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(",")))); - } + } if (vm_ci_flag(ci)) { - VALUE flags = rb_ary_new(); + VALUE flags = rb_ary_new(); # define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n)) - CALL_FLAG(ARGS_SPLAT); - CALL_FLAG(ARGS_BLOCKARG); - CALL_FLAG(FCALL); - CALL_FLAG(VCALL); - CALL_FLAG(ARGS_SIMPLE); - CALL_FLAG(BLOCKISEQ); - CALL_FLAG(TAILCALL); - CALL_FLAG(SUPER); - CALL_FLAG(ZSUPER); - CALL_FLAG(KWARG); - CALL_FLAG(KW_SPLAT); + CALL_FLAG(ARGS_SPLAT); + CALL_FLAG(ARGS_SPLAT_MUT); + CALL_FLAG(ARGS_BLOCKARG); + CALL_FLAG(FCALL); + CALL_FLAG(VCALL); + CALL_FLAG(ARGS_SIMPLE); + CALL_FLAG(TAILCALL); + CALL_FLAG(SUPER); + CALL_FLAG(ZSUPER); + CALL_FLAG(KWARG); + CALL_FLAG(KW_SPLAT); CALL_FLAG(KW_SPLAT_MUT); - CALL_FLAG(OPT_SEND); /* maybe not reachable */ - rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|"))); - } + CALL_FLAG(OPT_SEND); /* maybe not reachable */ + rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|"))); + } ret = rb_sprintf("<calldata!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", "))); } - break; + break; case TS_CDHASH: - ret = rb_str_new2("<cdhash>"); - break; + ret = rb_str_new2("<cdhash>"); + break; case TS_FUNCPTR: - { + { #ifdef HAVE_DLADDR - Dl_info info; - if (dladdr((void *)op, &info) && info.dli_sname) { - ret = rb_str_new_cstr(info.dli_sname); - break; - } + Dl_info info; + if (dladdr((void *)op, &info) && info.dli_sname) { + ret = rb_str_new_cstr(info.dli_sname); + break; + } #endif - ret = rb_str_new2("<funcptr>"); - } - break; + ret = rb_str_new2("<funcptr>"); + } + break; case TS_BUILTIN: { @@ -2123,7 +2476,7 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, break; default: - rb_bug("unknown operand type: %c", type); + rb_bug("unknown operand type: %c", type); } return ret; } @@ -2143,7 +2496,7 @@ right_strip(VALUE str) */ int rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos, - const rb_iseq_t *iseq, VALUE child) + const rb_iseq_t *iseq, VALUE child) { VALUE insn = code[pos]; int len = insn_len(insn); @@ -2154,60 +2507,61 @@ rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos, insn_name_buff = insn_name(insn); if (1) { - extern const int rb_vm_max_insn_name_size; - rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff); + extern const int rb_vm_max_insn_name_size; + rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff); } else { - rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos, - (int)strcspn(insn_name_buff, "_"), insn_name_buff); + rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos, + (int)strcspn(insn_name_buff, "_"), insn_name_buff); } for (j = 0; types[j]; j++) { - VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1], - len, pos, &code[pos + j + 2], - child); - rb_str_concat(str, opstr); + VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1], + len, pos, &code[pos + j + 2], + child); + rb_str_concat(str, opstr); - if (types[j + 1]) { - rb_str_cat2(str, ", "); - } + if (types[j + 1]) { + rb_str_cat2(str, ", "); + } } { - unsigned int line_no = rb_iseq_line_no(iseq, pos); - unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1); - if (line_no && line_no != prev) { - long slen = RSTRING_LEN(str); - slen = (slen > 70) ? 0 : (70 - slen); - str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no); - } + unsigned int line_no = rb_iseq_line_no(iseq, pos); + unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1); + if (line_no && line_no != prev) { + long slen = RSTRING_LEN(str); + slen = (slen > 70) ? 0 : (70 - slen); + str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no); + } } { - rb_event_flag_t events = rb_iseq_event_flags(iseq, pos); - if (events) { - str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s]", - events & RUBY_EVENT_LINE ? "Li" : "", - events & RUBY_EVENT_CLASS ? "Cl" : "", - events & RUBY_EVENT_END ? "En" : "", - events & RUBY_EVENT_CALL ? "Ca" : "", - events & RUBY_EVENT_RETURN ? "Re" : "", - events & RUBY_EVENT_C_CALL ? "Cc" : "", - events & RUBY_EVENT_C_RETURN ? "Cr" : "", - events & RUBY_EVENT_B_CALL ? "Bc" : "", + rb_event_flag_t events = rb_iseq_event_flags(iseq, pos); + if (events) { + str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s%s]", + events & RUBY_EVENT_LINE ? "Li" : "", + events & RUBY_EVENT_CLASS ? "Cl" : "", + events & RUBY_EVENT_END ? "En" : "", + events & RUBY_EVENT_CALL ? "Ca" : "", + events & RUBY_EVENT_RETURN ? "Re" : "", + events & RUBY_EVENT_C_CALL ? "Cc" : "", + events & RUBY_EVENT_C_RETURN ? "Cr" : "", + events & RUBY_EVENT_B_CALL ? "Bc" : "", events & RUBY_EVENT_B_RETURN ? "Br" : "", + events & RUBY_EVENT_RESCUE ? "Rs" : "", events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "", events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : ""); - } + } } right_strip(str); if (ret) { - rb_str_cat2(str, "\n"); - rb_str_concat(ret, str); + rb_str_cat2(str, "\n"); + rb_str_concat(ret, str); } else { - printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str)); + printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str)); } return len; } @@ -2217,39 +2571,39 @@ catch_type(int type) { switch (type) { case CATCH_TYPE_RESCUE: - return "rescue"; + return "rescue"; case CATCH_TYPE_ENSURE: - return "ensure"; + return "ensure"; case CATCH_TYPE_RETRY: - return "retry"; + return "retry"; case CATCH_TYPE_BREAK: - return "break"; + return "break"; case CATCH_TYPE_REDO: - return "redo"; + return "redo"; case CATCH_TYPE_NEXT: - return "next"; + return "next"; default: - rb_bug("unknown catch type: %d", type); - return 0; + rb_bug("unknown catch type: %d", type); + return 0; } } static VALUE iseq_inspect(const rb_iseq_t *iseq) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); if (!body->location.label) { - return rb_sprintf("#<ISeq: uninitialized>"); + return rb_sprintf("#<ISeq: uninitialized>"); } else { - const rb_code_location_t *loc = &body->location.code_location; - return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>", - body->location.label, rb_iseq_path(iseq), - loc->beg_pos.lineno, - loc->beg_pos.lineno, - loc->beg_pos.column, - loc->end_pos.lineno, - loc->end_pos.column); + const rb_code_location_t *loc = &body->location.code_location; + return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>", + body->location.label, rb_iseq_path(iseq), + loc->beg_pos.lineno, + loc->beg_pos.lineno, + loc->beg_pos.column, + loc->end_pos.lineno, + loc->end_pos.column); } } @@ -2262,10 +2616,10 @@ static const rb_data_type_t tmp_set = { static VALUE rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE *code; VALUE str = rb_str_new(0, 0); - VALUE child = rb_ary_tmp_new(3); + VALUE child = rb_ary_hidden_new(3); unsigned int size; unsigned int i; long l; @@ -2285,113 +2639,121 @@ rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) rb_str_cat2(str, "== disasm: "); rb_str_append(str, iseq_inspect(iseq)); - rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "TRUE" : "FALSE"); if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) { - rb_str_modify_expand(str, header_minlen - l); - memset(RSTRING_END(str), '=', header_minlen - l); + rb_str_modify_expand(str, header_minlen - l); + memset(RSTRING_END(str), '=', header_minlen - l); + } + if (iseq->body->builtin_attrs) { +#define disasm_builtin_attr(str, iseq, attr) \ + if (iseq->body->builtin_attrs & BUILTIN_ATTR_ ## attr) { \ + rb_str_cat2(str, " " #attr); \ + } + disasm_builtin_attr(str, iseq, LEAF); + disasm_builtin_attr(str, iseq, SINGLE_NOARG_LEAF); + disasm_builtin_attr(str, iseq, INLINE_BLOCK); } rb_str_cat2(str, "\n"); /* show catch table information */ if (body->catch_table) { - rb_str_cat(str, indent_str, indent_len); - rb_str_cat2(str, "== catch table\n"); + rb_str_cat(str, indent_str, indent_len); + rb_str_cat2(str, "== catch table\n"); } if (body->catch_table) { - rb_str_cat_cstr(indent, "| "); - indent_str = RSTRING_PTR(indent); - for (i = 0; i < body->catch_table->size; i++) { - const struct iseq_catch_table_entry *entry = - UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]); - rb_str_cat(str, indent_str, indent_len); - rb_str_catf(str, - "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n", - catch_type((int)entry->type), (int)entry->start, - (int)entry->end, (int)entry->sp, (int)entry->cont); - if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) { - rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent)); - if (!done_iseq) { + rb_str_cat_cstr(indent, "| "); + indent_str = RSTRING_PTR(indent); + for (i = 0; i < body->catch_table->size; i++) { + const struct iseq_catch_table_entry *entry = + UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]); + rb_str_cat(str, indent_str, indent_len); + rb_str_catf(str, + "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n", + catch_type((int)entry->type), (int)entry->start, + (int)entry->end, (int)entry->sp, (int)entry->cont); + if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) { + rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent)); + if (!done_iseq) { done_iseq = st_init_numtable(); done_iseq_wrapper = TypedData_Wrap_Struct(0, &tmp_set, done_iseq); } - st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0); - indent_str = RSTRING_PTR(indent); - } - } - rb_str_resize(indent, indent_len); - indent_str = RSTRING_PTR(indent); + st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0); + indent_str = RSTRING_PTR(indent); + } + } + rb_str_resize(indent, indent_len); + indent_str = RSTRING_PTR(indent); } if (body->catch_table) { - rb_str_cat(str, indent_str, indent_len); - rb_str_cat2(str, "|-------------------------------------" - "-----------------------------------\n"); + rb_str_cat(str, indent_str, indent_len); + rb_str_cat2(str, "|-------------------------------------" + "-----------------------------------\n"); } /* show local table information */ if (body->local_table) { - const struct rb_iseq_param_keyword *const keyword = body->param.keyword; - rb_str_cat(str, indent_str, indent_len); - rb_str_catf(str, - "local table (size: %d, argc: %d " - "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n", - body->local_table_size, - body->param.lead_num, - body->param.opt_num, - body->param.flags.has_rest ? body->param.rest_start : -1, - body->param.post_num, - body->param.flags.has_block ? body->param.block_start : -1, - body->param.flags.has_kw ? keyword->num : -1, - body->param.flags.has_kw ? keyword->required_num : -1, - body->param.flags.has_kwrest ? keyword->rest_start : -1); - - for (i = body->local_table_size; i > 0;) { - int li = body->local_table_size - --i - 1; - long width; - VALUE name = local_var_name(iseq, 0, i); + const struct rb_iseq_param_keyword *const keyword = body->param.keyword; + rb_str_cat(str, indent_str, indent_len); + rb_str_catf(str, + "local table (size: %d, argc: %d " + "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n", + body->local_table_size, + body->param.lead_num, + body->param.opt_num, + body->param.flags.has_rest ? body->param.rest_start : -1, + body->param.post_num, + body->param.flags.has_block ? body->param.block_start : -1, + body->param.flags.has_kw ? keyword->num : -1, + body->param.flags.has_kw ? keyword->required_num : -1, + body->param.flags.has_kwrest ? keyword->rest_start : -1); + + for (i = body->local_table_size; i > 0;) { + int li = body->local_table_size - --i - 1; + long width; + VALUE name = local_var_name(iseq, 0, i); char argi[0x100]; char opti[0x100]; opti[0] = '\0'; - if (body->param.flags.has_opt) { - int argc = body->param.lead_num; - int opts = body->param.opt_num; - if (li >= argc && li < argc + opts) { - snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE, - body->param.opt_table[li - argc]); - } - } - - snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */ - body->param.lead_num > li ? "Arg" : "", - opti, - (body->param.flags.has_rest && body->param.rest_start == li) ? "Rest" : "", - (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "", - (body->param.flags.has_kwrest && keyword->rest_start == li) ? "Kwrest" : "", - (body->param.flags.has_block && body->param.block_start == li) ? "Block" : ""); - - rb_str_cat(str, indent_str, indent_len); - rb_str_catf(str, "[%2d] ", i + 1); - width = RSTRING_LEN(str) + 11; - rb_str_append(str, name); - if (*argi) rb_str_catf(str, "<%s>", argi); - if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, ""); - } - rb_str_cat_cstr(right_strip(str), "\n"); + if (body->param.flags.has_opt) { + int argc = body->param.lead_num; + int opts = body->param.opt_num; + if (li >= argc && li < argc + opts) { + snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE, + body->param.opt_table[li - argc]); + } + } + + snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */ + body->param.lead_num > li ? "Arg" : "", + opti, + (body->param.flags.has_rest && body->param.rest_start == li) ? "Rest" : "", + (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "", + (body->param.flags.has_kwrest && keyword->rest_start == li) ? "Kwrest" : "", + (body->param.flags.has_block && body->param.block_start == li) ? "Block" : ""); + + rb_str_cat(str, indent_str, indent_len); + rb_str_catf(str, "[%2d] ", i + 1); + width = RSTRING_LEN(str) + 11; + rb_str_append(str, name); + if (*argi) rb_str_catf(str, "<%s>", argi); + if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, ""); + } + rb_str_cat_cstr(right_strip(str), "\n"); } /* show each line */ code = rb_iseq_original_iseq(iseq); for (n = 0; n < size;) { - rb_str_cat(str, indent_str, indent_len); - n += rb_iseq_disasm_insn(str, code, n, iseq, child); + rb_str_cat(str, indent_str, indent_len); + n += rb_iseq_disasm_insn(str, code, n, iseq, child); } for (l = 0; l < RARRAY_LEN(child); l++) { - VALUE isv = rb_ary_entry(child, l); - if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue; - rb_str_cat_cstr(str, "\n"); - rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent)); - indent_str = RSTRING_PTR(indent); + VALUE isv = rb_ary_entry(child, l); + if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue; + rb_str_cat_cstr(str, "\n"); + rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent)); + indent_str = RSTRING_PTR(indent); } RB_GC_GUARD(done_iseq_wrapper); @@ -2407,6 +2769,34 @@ rb_iseq_disasm(const rb_iseq_t *iseq) } /* + * Estimates the number of instance variables that will be set on + * a given `class` with the initialize method defined in + * `initialize_iseq` + */ +attr_index_t +rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq) +{ + struct rb_id_table * iv_names = rb_id_table_create(0); + + for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) { + IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i]; + + if (cache->iv_set_name) { + rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue); + } + } + + attr_index_t count = (attr_index_t)rb_id_table_size(iv_names); + + VALUE superclass = rb_class_superclass(klass); + count += RCLASS_EXT(superclass)->max_iv_count; + + rb_id_table_free(iv_names); + + return count; +} + +/* * call-seq: * iseq.disasm -> str * iseq.disassemble -> str @@ -2435,7 +2825,7 @@ iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t * { unsigned int i; VALUE *code = rb_iseq_original_iseq(iseq); - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); const rb_iseq_t *child; VALUE all_children = rb_obj_hide(rb_ident_hash_new()); @@ -2513,6 +2903,7 @@ push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE a C(RUBY_EVENT_END, "end", INT2FIX(line)); C(RUBY_EVENT_RETURN, "return", INT2FIX(line)); C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line)); + C(RUBY_EVENT_RESCUE, "rescue", INT2FIX(line)); #undef C } @@ -2527,15 +2918,15 @@ static VALUE iseqw_trace_points(VALUE self) { const rb_iseq_t *iseq = iseqw_check(self); - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); unsigned int i; VALUE ary = rb_ary_new(); for (i=0; i<body->insns_info.size; i++) { - const struct iseq_insn_info_entry *entry = &body->insns_info.body[i]; - if (entry->events) { - push_event_info(iseq, entry->events, entry->line_no, ary); - } + const struct iseq_insn_info_entry *entry = &body->insns_info.body[i]; + if (entry->events) { + push_event_info(iseq, entry->events, entry->line_no, ary); + } } return ary; } @@ -2656,23 +3047,6 @@ iseqw_s_disasm(VALUE klass, VALUE body) return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw)); } -const char * -ruby_node_name(int node) -{ - switch (node) { -#include "node_name.inc" - default: - rb_bug("unknown node: %d", node); - return 0; - } -} - -#define DECL_SYMBOL(name) \ - static ID sym_##name - -#define INIT_SYMBOL(name) \ - sym_##name = rb_intern(#name) - static VALUE register_label(struct st_table *table, unsigned long idx) { @@ -2693,7 +3067,7 @@ exception_type2symbol(VALUE type) case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break; case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break; default: - rb_bug("unknown exception type: %d", (int)type); + rb_bug("unknown exception type: %d", (int)type); } return ID2SYM(id); } @@ -2712,12 +3086,58 @@ static const rb_data_type_t label_wrapper = { 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; +#define DECL_ID(name) \ + static ID id_##name + +#define INIT_ID(name) \ + id_##name = rb_intern(#name) + +static VALUE +iseq_type_id(enum rb_iseq_type type) +{ + DECL_ID(top); + DECL_ID(method); + DECL_ID(block); + DECL_ID(class); + DECL_ID(rescue); + DECL_ID(ensure); + DECL_ID(eval); + DECL_ID(main); + DECL_ID(plain); + + if (id_top == 0) { + INIT_ID(top); + INIT_ID(method); + INIT_ID(block); + INIT_ID(class); + INIT_ID(rescue); + INIT_ID(ensure); + INIT_ID(eval); + INIT_ID(main); + INIT_ID(plain); + } + + switch (type) { + case ISEQ_TYPE_TOP: return id_top; + case ISEQ_TYPE_METHOD: return id_method; + case ISEQ_TYPE_BLOCK: return id_block; + case ISEQ_TYPE_CLASS: return id_class; + case ISEQ_TYPE_RESCUE: return id_rescue; + case ISEQ_TYPE_ENSURE: return id_ensure; + case ISEQ_TYPE_EVAL: return id_eval; + case ISEQ_TYPE_MAIN: return id_main; + case ISEQ_TYPE_PLAIN: return id_plain; + }; + + rb_bug("unsupported iseq type: %d", (int)type); +} + static VALUE iseq_data_to_ary(const rb_iseq_t *iseq) { unsigned int i; long l; - const struct rb_iseq_constant_body *const iseq_body = iseq->body; + const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq); const struct iseq_insn_info_entry *prev_insn_info; unsigned int pos; int last_line = 0; @@ -2736,209 +3156,192 @@ iseq_data_to_ary(const rb_iseq_t *iseq) struct st_table *labels_table = st_init_numtable(); VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table); - DECL_SYMBOL(top); - DECL_SYMBOL(method); - DECL_SYMBOL(block); - DECL_SYMBOL(class); - DECL_SYMBOL(rescue); - DECL_SYMBOL(ensure); - DECL_SYMBOL(eval); - DECL_SYMBOL(main); - DECL_SYMBOL(plain); - - if (sym_top == 0) { - int i; - for (i=0; i<numberof(insn_syms); i++) { + if (insn_syms[0] == 0) { + int i; + for (i=0; i<numberof(insn_syms); i++) { insn_syms[i] = rb_intern(insn_name(i)); - } - INIT_SYMBOL(top); - INIT_SYMBOL(method); - INIT_SYMBOL(block); - INIT_SYMBOL(class); - INIT_SYMBOL(rescue); - INIT_SYMBOL(ensure); - INIT_SYMBOL(eval); - INIT_SYMBOL(main); - INIT_SYMBOL(plain); + } } /* type */ - switch (iseq_body->type) { - case ISEQ_TYPE_TOP: type = sym_top; break; - case ISEQ_TYPE_METHOD: type = sym_method; break; - case ISEQ_TYPE_BLOCK: type = sym_block; break; - case ISEQ_TYPE_CLASS: type = sym_class; break; - case ISEQ_TYPE_RESCUE: type = sym_rescue; break; - case ISEQ_TYPE_ENSURE: type = sym_ensure; break; - case ISEQ_TYPE_EVAL: type = sym_eval; break; - case ISEQ_TYPE_MAIN: type = sym_main; break; - case ISEQ_TYPE_PLAIN: type = sym_plain; break; - default: rb_bug("unsupported iseq type: %d", (int)iseq_body->type); - }; + type = iseq_type_id(iseq_body->type); /* locals */ for (i=0; i<iseq_body->local_table_size; i++) { - ID lid = iseq_body->local_table[i]; - if (lid) { - if (rb_id2str(lid)) { - rb_ary_push(locals, ID2SYM(lid)); - } - else { /* hidden variable from id_internal() */ - rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1)); - } - } - else { - rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest"))); - } + ID lid = iseq_body->local_table[i]; + if (lid) { + if (rb_id2str(lid)) { + rb_ary_push(locals, ID2SYM(lid)); + } + else { /* hidden variable from id_internal() */ + rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1)); + } + } + else { + rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest"))); + } } /* params */ { - const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword; - int j; - - if (iseq_body->param.flags.has_opt) { - int len = iseq_body->param.opt_num + 1; - VALUE arg_opt_labels = rb_ary_new2(len); - - for (j = 0; j < len; j++) { - VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]); - rb_ary_push(arg_opt_labels, l); - } - rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels); - } - - /* commit */ - if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num)); - if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num)); - if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start)); - if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start)); - if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start)); - if (iseq_body->param.flags.has_kw) { - VALUE keywords = rb_ary_new(); - int i, j; - for (i=0; i<keyword->required_num; i++) { - rb_ary_push(keywords, ID2SYM(keyword->table[i])); - } - for (j=0; i<keyword->num; i++, j++) { - VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i])); - if (keyword->default_values[j] != Qundef) { - rb_ary_push(key, keyword->default_values[j]); - } - rb_ary_push(keywords, key); - } - - rb_hash_aset(params, ID2SYM(rb_intern("kwbits")), - INT2FIX(keyword->bits_start)); - rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords); - } - if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start)); - if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue); + const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword; + int j; + + if (iseq_body->param.flags.has_opt) { + int len = iseq_body->param.opt_num + 1; + VALUE arg_opt_labels = rb_ary_new2(len); + + for (j = 0; j < len; j++) { + VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]); + rb_ary_push(arg_opt_labels, l); + } + rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels); + } + + /* commit */ + if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num)); + if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num)); + if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start)); + if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start)); + if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start)); + if (iseq_body->param.flags.has_kw) { + VALUE keywords = rb_ary_new(); + int i, j; + for (i=0; i<keyword->required_num; i++) { + rb_ary_push(keywords, ID2SYM(keyword->table[i])); + } + for (j=0; i<keyword->num; i++, j++) { + VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i])); + if (!UNDEF_P(keyword->default_values[j])) { + rb_ary_push(key, keyword->default_values[j]); + } + rb_ary_push(keywords, key); + } + + rb_hash_aset(params, ID2SYM(rb_intern("kwbits")), + INT2FIX(keyword->bits_start)); + rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords); + } + if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start)); + if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue); + if (iseq_body->param.flags.use_block) rb_hash_aset(params, ID2SYM(rb_intern("use_block")), Qtrue); } /* body */ iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq); for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) { - VALUE insn = *seq++; - int j, len = insn_len(insn); - VALUE *nseq = seq + len - 1; - VALUE ary = rb_ary_new2(len); + VALUE insn = *seq++; + int j, len = insn_len(insn); + VALUE *nseq = seq + len - 1; + VALUE ary = rb_ary_new2(len); rb_ary_push(ary, ID2SYM(insn_syms[insn%numberof(insn_syms)])); - for (j=0; j<len-1; j++, seq++) { - switch (insn_op_type(insn, j)) { - case TS_OFFSET: { - unsigned long idx = nseq - iseq_original + *seq; - rb_ary_push(ary, register_label(labels_table, idx)); - break; - } - case TS_LINDEX: - case TS_NUM: - rb_ary_push(ary, INT2FIX(*seq)); - break; - case TS_VALUE: - rb_ary_push(ary, obj_resurrect(*seq)); - break; - case TS_ISEQ: - { - const rb_iseq_t *iseq = (rb_iseq_t *)*seq; - if (iseq) { - VALUE val = iseq_data_to_ary(rb_iseq_check(iseq)); - rb_ary_push(ary, val); - } - else { - rb_ary_push(ary, Qnil); - } - } - break; - case TS_IC: + for (j=0; j<len-1; j++, seq++) { + enum ruby_insn_type_chars op_type = insn_op_type(insn, j); + + switch (op_type) { + case TS_OFFSET: { + unsigned long idx = nseq - iseq_original + *seq; + rb_ary_push(ary, register_label(labels_table, idx)); + break; + } + case TS_LINDEX: + case TS_NUM: + rb_ary_push(ary, INT2FIX(*seq)); + break; + case TS_VALUE: + rb_ary_push(ary, obj_resurrect(*seq)); + break; + case TS_ISEQ: + { + const rb_iseq_t *iseq = (rb_iseq_t *)*seq; + if (iseq) { + VALUE val = iseq_data_to_ary(rb_iseq_check(iseq)); + rb_ary_push(ary, val); + } + else { + rb_ary_push(ary, Qnil); + } + } + break; + case TS_IC: + { + VALUE list = rb_ary_new(); + const ID *ids = ((IC)*seq)->segments; + while (*ids) { + rb_ary_push(list, ID2SYM(*ids++)); + } + rb_ary_push(ary, list); + } + break; case TS_IVC: - case TS_ISE: - { - union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq; - rb_ary_push(ary, INT2FIX(is - iseq_body->is_entries)); - } - break; + case TS_ICVARC: + case TS_ISE: + { + union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq; + rb_ary_push(ary, INT2FIX(is - ISEQ_IS_ENTRY_START(ISEQ_BODY(iseq), op_type))); + } + break; case TS_CALLDATA: - { + { struct rb_call_data *cd = (struct rb_call_data *)*seq; const struct rb_callinfo *ci = cd->ci; - VALUE e = rb_hash_new(); + VALUE e = rb_hash_new(); int argc = vm_ci_argc(ci); ID mid = vm_ci_mid(ci); - rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil); - rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci))); + rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil); + rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci))); if (vm_ci_flag(ci) & VM_CALL_KWARG) { const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci); int i; - VALUE kw = rb_ary_new2((long)kwarg->keyword_len); + VALUE kw = rb_ary_new2((long)kwarg->keyword_len); - argc -= kwarg->keyword_len; + argc -= kwarg->keyword_len; for (i = 0; i < kwarg->keyword_len; i++) { - rb_ary_push(kw, kwarg->keywords[i]); - } - rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw); - } - - rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")), - INT2FIX(argc)); - rb_ary_push(ary, e); - } - break; - case TS_ID: - rb_ary_push(ary, ID2SYM(*seq)); - break; - case TS_CDHASH: - { - VALUE hash = *seq; - VALUE val = rb_ary_new(); - int i; - - rb_hash_foreach(hash, cdhash_each, val); - - for (i=0; i<RARRAY_LEN(val); i+=2) { - VALUE pos = FIX2INT(rb_ary_entry(val, i+1)); - unsigned long idx = nseq - iseq_original + pos; - - rb_ary_store(val, i+1, - register_label(labels_table, idx)); - } - rb_ary_push(ary, val); - } - break; - case TS_FUNCPTR: - { + rb_ary_push(kw, kwarg->keywords[i]); + } + rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw); + } + + rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")), + INT2FIX(argc)); + rb_ary_push(ary, e); + } + break; + case TS_ID: + rb_ary_push(ary, ID2SYM(*seq)); + break; + case TS_CDHASH: + { + VALUE hash = *seq; + VALUE val = rb_ary_new(); + int i; + + rb_hash_foreach(hash, cdhash_each, val); + + for (i=0; i<RARRAY_LEN(val); i+=2) { + VALUE pos = FIX2INT(rb_ary_entry(val, i+1)); + unsigned long idx = nseq - iseq_original + pos; + + rb_ary_store(val, i+1, + register_label(labels_table, idx)); + } + rb_ary_push(ary, val); + } + break; + case TS_FUNCPTR: + { #if SIZEOF_VALUE <= SIZEOF_LONG - VALUE val = LONG2NUM((SIGNED_VALUE)*seq); + VALUE val = LONG2NUM((SIGNED_VALUE)*seq); #else - VALUE val = LL2NUM((SIGNED_VALUE)*seq); + VALUE val = LL2NUM((SIGNED_VALUE)*seq); #endif - rb_ary_push(ary, val); - } - break; + rb_ary_push(ary, val); + } + break; case TS_BUILTIN: { VALUE val = rb_hash_new(); @@ -2954,32 +3357,32 @@ iseq_data_to_ary(const rb_iseq_t *iseq) rb_ary_push(ary, val); } break; - default: - rb_bug("unknown operand: %c", insn_op_type(insn, j)); - } - } - rb_ary_push(body, ary); + default: + rb_bug("unknown operand: %c", insn_op_type(insn, j)); + } + } + rb_ary_push(body, ary); } nbody = body; /* exception */ if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) { - VALUE ary = rb_ary_new(); - const struct iseq_catch_table_entry *entry = - UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]); - rb_ary_push(ary, exception_type2symbol(entry->type)); - if (entry->iseq) { - rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq))); - } - else { - rb_ary_push(ary, Qnil); - } - rb_ary_push(ary, register_label(labels_table, entry->start)); - rb_ary_push(ary, register_label(labels_table, entry->end)); - rb_ary_push(ary, register_label(labels_table, entry->cont)); - rb_ary_push(ary, UINT2NUM(entry->sp)); - rb_ary_push(exception, ary); + VALUE ary = rb_ary_new(); + const struct iseq_catch_table_entry *entry = + UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]); + rb_ary_push(ary, exception_type2symbol(entry->type)); + if (entry->iseq) { + rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq))); + } + else { + rb_ary_push(ary, Qnil); + } + rb_ary_push(ary, register_label(labels_table, entry->start)); + rb_ary_push(ary, register_label(labels_table, entry->end)); + rb_ary_push(ary, register_label(labels_table, entry->cont)); + rb_ary_push(ary, UINT2NUM(entry->sp)); + rb_ary_push(exception, ary); } /* make body with labels and insert line number */ @@ -2990,41 +3393,42 @@ iseq_data_to_ary(const rb_iseq_t *iseq) #endif for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) { - const struct iseq_insn_info_entry *info; - VALUE ary = RARRAY_AREF(nbody, l); - st_data_t label; + const struct iseq_insn_info_entry *info; + VALUE ary = RARRAY_AREF(nbody, l); + st_data_t label; - if (st_lookup(labels_table, pos, &label)) { - rb_ary_push(body, (VALUE)label); - } + if (st_lookup(labels_table, pos, &label)) { + rb_ary_push(body, (VALUE)label); + } - info = get_insn_info(iseq, pos); + info = get_insn_info(iseq, pos); #ifdef USE_ISEQ_NODE_ID rb_ary_push(node_ids, INT2FIX(info->node_id)); #endif - if (prev_insn_info != info) { - int line = info->line_no; - rb_event_flag_t events = info->events; + if (prev_insn_info != info) { + int line = info->line_no; + rb_event_flag_t events = info->events; - if (line > 0 && last_line != line) { - rb_ary_push(body, INT2FIX(line)); - last_line = line; - } + if (line > 0 && last_line != line) { + rb_ary_push(body, INT2FIX(line)); + last_line = line; + } #define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev))); - CHECK_EVENT(RUBY_EVENT_LINE); - CHECK_EVENT(RUBY_EVENT_CLASS); - CHECK_EVENT(RUBY_EVENT_END); - CHECK_EVENT(RUBY_EVENT_CALL); - CHECK_EVENT(RUBY_EVENT_RETURN); - CHECK_EVENT(RUBY_EVENT_B_CALL); - CHECK_EVENT(RUBY_EVENT_B_RETURN); + CHECK_EVENT(RUBY_EVENT_LINE); + CHECK_EVENT(RUBY_EVENT_CLASS); + CHECK_EVENT(RUBY_EVENT_END); + CHECK_EVENT(RUBY_EVENT_CALL); + CHECK_EVENT(RUBY_EVENT_RETURN); + CHECK_EVENT(RUBY_EVENT_B_CALL); + CHECK_EVENT(RUBY_EVENT_B_RETURN); + CHECK_EVENT(RUBY_EVENT_RESCUE); #undef CHECK_EVENT - prev_insn_info = info; - } + prev_insn_info = info; + } - rb_ary_push(body, ary); - pos += RARRAY_LENINT(ary); /* reject too huge data */ + rb_ary_push(body, ary); + pos += RARRAY_LENINT(ary); /* reject too huge data */ } RB_GC_GUARD(nbody); RB_GC_GUARD(labels_wrapper); @@ -3034,14 +3438,15 @@ iseq_data_to_ary(const rb_iseq_t *iseq) rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max)); rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id)); rb_hash_aset(misc, ID2SYM(rb_intern("code_location")), - rb_ary_new_from_args(4, - INT2FIX(iseq_body->location.code_location.beg_pos.lineno), - INT2FIX(iseq_body->location.code_location.beg_pos.column), - INT2FIX(iseq_body->location.code_location.end_pos.lineno), - INT2FIX(iseq_body->location.code_location.end_pos.column))); + rb_ary_new_from_args(4, + INT2FIX(iseq_body->location.code_location.beg_pos.lineno), + INT2FIX(iseq_body->location.code_location.beg_pos.column), + INT2FIX(iseq_body->location.code_location.end_pos.lineno), + INT2FIX(iseq_body->location.code_location.end_pos.column))); #ifdef USE_ISEQ_NODE_ID rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids); #endif + rb_hash_aset(misc, ID2SYM(rb_intern("parser")), iseq_body->prism ? ID2SYM(rb_intern("prism")) : ID2SYM(rb_intern("parse.y"))); /* * [:magic, :major_version, :minor_version, :format_type, :misc, @@ -3056,7 +3461,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) rb_ary_push(val, iseq_body->location.label); rb_ary_push(val, rb_iseq_path(iseq)); rb_ary_push(val, rb_iseq_realpath(iseq)); - rb_ary_push(val, iseq_body->location.first_lineno); + rb_ary_push(val, RB_INT2NUM(iseq_body->location.first_lineno)); rb_ary_push(val, ID2SYM(type)); rb_ary_push(val, locals); rb_ary_push(val, params); @@ -3069,88 +3474,88 @@ VALUE rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) { int i, r; - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); const struct rb_iseq_param_keyword *const keyword = body->param.keyword; VALUE a, args = rb_ary_new2(body->param.size); ID req, opt, rest, block, key, keyrest; #define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type)) #define PARAM_ID(i) body->local_table[(i)] #define PARAM(i, type) ( \ - PARAM_TYPE(type), \ - rb_id2str(PARAM_ID(i)) ? \ - rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \ - a) + PARAM_TYPE(type), \ + rb_id2str(PARAM_ID(i)) ? \ + rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \ + a) CONST_ID(req, "req"); CONST_ID(opt, "opt"); if (is_proc) { - for (i = 0; i < body->param.lead_num; i++) { - PARAM_TYPE(opt); - rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); - rb_ary_push(args, a); - } + for (i = 0; i < body->param.lead_num; i++) { + PARAM_TYPE(opt); + rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); + rb_ary_push(args, a); + } } else { - for (i = 0; i < body->param.lead_num; i++) { - rb_ary_push(args, PARAM(i, req)); - } + for (i = 0; i < body->param.lead_num; i++) { + rb_ary_push(args, PARAM(i, req)); + } } r = body->param.lead_num + body->param.opt_num; for (; i < r; i++) { - PARAM_TYPE(opt); - if (rb_id2str(PARAM_ID(i))) { - rb_ary_push(a, ID2SYM(PARAM_ID(i))); - } - rb_ary_push(args, a); + PARAM_TYPE(opt); + if (rb_id2str(PARAM_ID(i))) { + rb_ary_push(a, ID2SYM(PARAM_ID(i))); + } + rb_ary_push(args, a); } if (body->param.flags.has_rest) { - CONST_ID(rest, "rest"); - rb_ary_push(args, PARAM(body->param.rest_start, rest)); + CONST_ID(rest, "rest"); + rb_ary_push(args, PARAM(body->param.rest_start, rest)); } r = body->param.post_start + body->param.post_num; if (is_proc) { - for (i = body->param.post_start; i < r; i++) { - PARAM_TYPE(opt); - rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); - rb_ary_push(args, a); - } + for (i = body->param.post_start; i < r; i++) { + PARAM_TYPE(opt); + rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); + rb_ary_push(args, a); + } } else { - for (i = body->param.post_start; i < r; i++) { - rb_ary_push(args, PARAM(i, req)); - } + for (i = body->param.post_start; i < r; i++) { + rb_ary_push(args, PARAM(i, req)); + } } if (body->param.flags.accepts_no_kwarg) { - ID nokey; - CONST_ID(nokey, "nokey"); - PARAM_TYPE(nokey); - rb_ary_push(args, a); + ID nokey; + CONST_ID(nokey, "nokey"); + PARAM_TYPE(nokey); + rb_ary_push(args, a); } if (body->param.flags.has_kw) { - i = 0; - if (keyword->required_num > 0) { - ID keyreq; - CONST_ID(keyreq, "keyreq"); - for (; i < keyword->required_num; i++) { - PARAM_TYPE(keyreq); - if (rb_id2str(keyword->table[i])) { - rb_ary_push(a, ID2SYM(keyword->table[i])); - } - rb_ary_push(args, a); - } - } - CONST_ID(key, "key"); - for (; i < keyword->num; i++) { - PARAM_TYPE(key); - if (rb_id2str(keyword->table[i])) { - rb_ary_push(a, ID2SYM(keyword->table[i])); - } - rb_ary_push(args, a); - } + i = 0; + if (keyword->required_num > 0) { + ID keyreq; + CONST_ID(keyreq, "keyreq"); + for (; i < keyword->required_num; i++) { + PARAM_TYPE(keyreq); + if (rb_id2str(keyword->table[i])) { + rb_ary_push(a, ID2SYM(keyword->table[i])); + } + rb_ary_push(args, a); + } + } + CONST_ID(key, "key"); + for (; i < keyword->num; i++) { + PARAM_TYPE(key); + if (rb_id2str(keyword->table[i])) { + rb_ary_push(a, ID2SYM(keyword->table[i])); + } + rb_ary_push(args, a); + } } if (body->param.flags.has_kwrest || body->param.flags.ruby2_keywords) { ID param; - CONST_ID(keyrest, "keyrest"); + CONST_ID(keyrest, "keyrest"); PARAM_TYPE(keyrest); if (body->param.flags.has_kwrest && rb_id2str(param = PARAM_ID(keyword->rest_start))) { @@ -3159,11 +3564,11 @@ rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) else if (body->param.flags.ruby2_keywords) { rb_ary_push(a, ID2SYM(idPow)); } - rb_ary_push(args, a); + rb_ary_push(args, a); } if (body->param.flags.has_block) { - CONST_ID(block, "block"); - rb_ary_push(args, PARAM(body->param.block_start, block)); + CONST_ID(block, "block"); + rb_ary_push(args, PARAM(body->param.block_start, block)); } return args; } @@ -3172,20 +3577,20 @@ VALUE rb_iseq_defined_string(enum defined_type type) { static const char expr_names[][18] = { - "nil", - "instance-variable", - "local-variable", - "global-variable", - "class variable", - "constant", - "method", - "yield", - "super", - "self", - "true", - "false", - "assignment", - "expression", + "nil", + "instance-variable", + "local-variable", + "global-variable", + "class variable", + "constant", + "method", + "yield", + "super", + "self", + "true", + "false", + "assignment", + "expression", }; const char *estr; @@ -3207,6 +3612,12 @@ typedef struct insn_data_struct { static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2]; void +rb_free_encoded_insn_data(void) +{ + st_free_table(encoded_insn_data); +} + +void rb_vm_encoded_insn_data_table_init(void) { #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE @@ -3272,7 +3683,7 @@ rb_vm_insn_addr2opcode(const void *addr) rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr); } -// Decode `iseq->body->iseq_encoded[i]` to an insn. +// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. int rb_vm_insn_decode(const VALUE encoded) { @@ -3305,17 +3716,34 @@ encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos) { - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; encoded_iseq_trace_instrument(&iseq_encoded[pos], 0, false); } +// We need to fire call events on instructions with b_call events if the block +// is running as a method. So, if we are listening for call events, then +// instructions that have b_call events need to become trace variants. +// Use this function when making decisions about recompiling to trace variants. +static inline rb_event_flag_t +add_bmethod_events(rb_event_flag_t events) +{ + if (events & RUBY_EVENT_CALL) { + events |= RUBY_EVENT_B_CALL; + } + if (events & RUBY_EVENT_RETURN) { + events |= RUBY_EVENT_B_RETURN; + } + return events; +} + +// Note, to support call/return events for bmethods, turnon_event can have more events than tpval. static int iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line) { unsigned int pc; int n = 0; - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; VM_ASSERT(ISEQ_EXECUTABLE_P(iseq)); @@ -3342,6 +3770,7 @@ iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, if (n > 0) { if (iseq->aux.exec.local_hooks == NULL) { ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t); + iseq->aux.exec.local_hooks->is_local = true; } rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line); } @@ -3365,9 +3794,12 @@ iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p) } int -rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line) +rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod) { struct trace_set_local_events_struct data; + if (target_bmethod) { + turnon_events = add_bmethod_events(turnon_events); + } data.turnon_events = turnon_events; data.tpval = tpval; data.target_line = target_line; @@ -3385,7 +3817,7 @@ iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval) if (iseq->aux.exec.local_hooks) { unsigned int pc; - const struct rb_iseq_constant_body *const body = iseq->body; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; rb_event_flag_t local_events = 0; @@ -3393,12 +3825,11 @@ iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval) local_events = iseq->aux.exec.local_hooks->events; if (local_events == 0) { - if (iseq->aux.exec.local_hooks->running == 0) { - rb_hook_list_free(iseq->aux.exec.local_hooks); - } + rb_hook_list_free(iseq->aux.exec.local_hooks); ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL; } + local_events = add_bmethod_events(local_events); for (pc = 0; pc<body->iseq_size;) { rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc); pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false); @@ -3435,32 +3866,55 @@ void rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events) { if (iseq->aux.exec.global_trace_events == turnon_events) { - return; + return; } if (!ISEQ_EXECUTABLE_P(iseq)) { - /* this is building ISeq */ - return; + /* this is building ISeq */ + return; } else { unsigned int pc; - const struct rb_iseq_constant_body *const body = iseq->body; - VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); + VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; rb_event_flag_t enabled_events; rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0; ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events; - enabled_events = turnon_events | local_events; + enabled_events = add_bmethod_events(turnon_events | local_events); for (pc=0; pc<body->iseq_size;) { rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc); pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true); - } + } } } -bool rb_vm_call_ivar_attrset_p(const vm_call_handler ch); void rb_vm_cc_general(const struct rb_callcache *cc); +static bool +clear_attr_cc(VALUE v) +{ + if (imemo_type_p(v, imemo_callcache) && vm_cc_ivar_p((const struct rb_callcache *)v)) { + rb_vm_cc_general((struct rb_callcache *)v); + return true; + } + else { + return false; + } +} + +static bool +clear_bf_cc(VALUE v) +{ + if (imemo_type_p(v, imemo_callcache) && vm_cc_bf_p((const struct rb_callcache *)v)) { + rb_vm_cc_general((struct rb_callcache *)v); + return true; + } + else { + return false; + } +} + static int clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data) { @@ -3468,11 +3922,7 @@ clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data) for (; v != (VALUE)vend; v += stride) { void *ptr = asan_poisoned_object_p(v); asan_unpoison_object(v, false); - - if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) { - rb_vm_cc_general((struct rb_callcache *)v); - } - + clear_attr_cc(v); asan_poison_object_if(ptr, v); } return 0; @@ -3485,6 +3935,25 @@ rb_clear_attr_ccs(void) } static int +clear_bf_ccs_i(void *vstart, void *vend, size_t stride, void *data) +{ + VALUE v = (VALUE)vstart; + for (; v != (VALUE)vend; v += stride) { + void *ptr = asan_poisoned_object_p(v); + asan_unpoison_object(v, false); + clear_bf_cc(v); + asan_poison_object_if(ptr, v); + } + return 0; +} + +void +rb_clear_bf_ccs(void) +{ + rb_objspace_each_objects(clear_bf_ccs_i, NULL); +} + +static int trace_set_i(void *vstart, void *vend, size_t stride, void *data) { rb_event_flag_t turnon_events = *(rb_event_flag_t *)data; @@ -3494,11 +3963,12 @@ trace_set_i(void *vstart, void *vend, size_t stride, void *data) void *ptr = asan_poisoned_object_p(v); asan_unpoison_object(v, false); - if (rb_obj_is_iseq(v)) { - rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events); - } - else if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) { - rb_vm_cc_general((struct rb_callcache *)v); + if (rb_obj_is_iseq(v)) { + rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events); + } + else if (clear_attr_cc(v)) { + } + else if (clear_bf_cc(v)) { } asan_poison_object_if(ptr, v); @@ -3611,9 +4081,9 @@ iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str) struct succ_index_table { uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9]; struct succ_dict_block { - unsigned int rank; - uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */ - uint64_t bits[512/64]; + unsigned int rank; + uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */ + uint64_t bits[512/64]; } succ_part[FLEX_ARY_LEN]; }; @@ -3635,27 +4105,27 @@ succ_index_table_create(int max_pos, int *data, int size) r = 0; for (j = 0; j < imm_size; j++) { - for (i = 0; i < 9; i++) { - if (r < size && data[r] == j * 9 + i) r++; - imm_block_rank_set(sd->imm_part[j], i, r); - } + for (i = 0; i < 9; i++) { + if (r < size && data[r] == j * 9 + i) r++; + imm_block_rank_set(sd->imm_part[j], i, r); + } } for (k = 0; k < succ_size; k++) { - struct succ_dict_block *sd_block = &sd->succ_part[k]; - int small_rank = 0; - sd_block->rank = r; - for (j = 0; j < 8; j++) { - uint64_t bits = 0; - if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank); - for (i = 0; i < 64; i++) { - if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) { - bits |= ((uint64_t)1) << i; - r++; - } - } - sd_block->bits[j] = bits; - small_rank += rb_popcount64(bits); - } + struct succ_dict_block *sd_block = &sd->succ_part[k]; + int small_rank = 0; + sd_block->rank = r; + for (j = 0; j < 8; j++) { + uint64_t bits = 0; + if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank); + for (i = 0; i < 64; i++) { + if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) { + bits |= ((uint64_t)1) << i; + r++; + } + } + sd_block->bits[j] = bits; + small_rank += rb_popcount64(bits); + } } return sd; } @@ -3669,20 +4139,20 @@ succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size) int i, j, k, r = -1; p = positions; for (j = 0; j < imm_size; j++) { - for (i = 0; i < 9; i++) { - int nr = imm_block_rank_get(sd->imm_part[j], i); - if (r != nr) *p++ = j * 9 + i; - r = nr; - } + for (i = 0; i < 9; i++) { + int nr = imm_block_rank_get(sd->imm_part[j], i); + if (r != nr) *p++ = j * 9 + i; + r = nr; + } } for (k = 0; k < succ_size; k++) { - for (j = 0; j < 8; j++) { - for (i = 0; i < 64; i++) { - if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) { - *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE; - } - } - } + for (j = 0; j < 8; j++) { + for (i = 0; i < 64; i++) { + if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) { + *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE; + } + } + } } return positions; } @@ -3691,19 +4161,19 @@ static int succ_index_lookup(const struct succ_index_table *sd, int x) { if (x < IMMEDIATE_TABLE_SIZE) { - const int i = x / 9; - const int j = x % 9; - return imm_block_rank_get(sd->imm_part[i], j); + const int i = x / 9; + const int j = x % 9; + return imm_block_rank_get(sd->imm_part[i], j); } else { - const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512; - const struct succ_dict_block *block = &sd->succ_part[block_index]; - const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512; - const int small_block_index = block_bit_index / 64; - const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index); - const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64)); + const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512; + const struct succ_dict_block *block = &sd->succ_part[block_index]; + const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512; + const int small_block_index = block_bit_index / 64; + const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index); + const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64)); - return block->rank + small_block_popcount + popcnt; + return block->rank + small_block_popcount + popcnt; } } #endif @@ -3713,7 +4183,7 @@ succ_index_lookup(const struct succ_index_table *sd, int x) * call-seq: * iseq.script_lines -> array or nil * - * It returns recorded script lines if it is availalble. + * It returns recorded script lines if it is available. * The script lines are not limited to the iseq range, but * are entire lines of the source file. * @@ -3725,7 +4195,7 @@ static VALUE iseqw_script_lines(VALUE self) { const rb_iseq_t *iseq = iseqw_check(self); - return iseq->body->variable.script_lines; + return ISEQ_BODY(iseq)->variable.script_lines; } /* @@ -3768,7 +4238,6 @@ Init_ISeq(void) rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1); rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1); - /* location APIs */ rb_define_method(rb_cISeq, "path", iseqw_path, 0); rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0); @@ -3787,6 +4256,8 @@ Init_ISeq(void) (void)iseq_s_load; rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1); + rb_define_singleton_method(rb_cISeq, "compile_prism", iseqw_s_compile_prism, -1); + rb_define_singleton_method(rb_cISeq, "compile_file_prism", iseqw_s_compile_file_prism, -1); rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1); rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1); rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0); |