diff options
Diffstat (limited to 'iseq.c')
| -rw-r--r-- | iseq.c | 1368 |
1 files changed, 1043 insertions, 325 deletions
@@ -19,6 +19,7 @@ #endif #include "eval_intern.h" +#include "id.h" #include "id_table.h" #include "internal.h" #include "internal/bits.h" @@ -28,21 +29,24 @@ #include "internal/file.h" #include "internal/gc.h" #include "internal/hash.h" -#include "internal/parse.h" +#include "internal/io.h" +#include "internal/ruby_parser.h" #include "internal/sanitizers.h" +#include "internal/set_table.h" #include "internal/symbol.h" #include "internal/thread.h" #include "internal/variable.h" #include "iseq.h" -#include "rjit.h" #include "ruby/util.h" #include "vm_core.h" +#include "ractor_core.h" #include "vm_callinfo.h" #include "yjit.h" #include "ruby/ractor.h" #include "builtin.h" #include "insns.inc" #include "insns_info.inc" +#include "zjit.h" VALUE rb_cISeq; static VALUE iseqw_new(const rb_iseq_t *iseq); @@ -84,7 +88,7 @@ free_arena(struct iseq_compile_data_storage *cur) while (cur) { next = cur->next; - ruby_xfree(cur); + ruby_xfree_sized(cur, offsetof(struct iseq_compile_data_storage, buff) + cur->size * sizeof(char)); cur = next; } } @@ -98,7 +102,7 @@ compile_data_free(struct iseq_compile_data *compile_data) if (compile_data->ivar_cache_table) { rb_id_table_free(compile_data->ivar_cache_table); } - ruby_xfree(compile_data); + SIZED_FREE(compile_data); } } @@ -109,13 +113,15 @@ remove_from_constant_cache(ID id, IC ic) VALUE lookup_result; st_data_t ic_data = (st_data_t)ic; - if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) { - st_table *ics = (st_table *)lookup_result; - st_delete(ics, &ic_data, NULL); + if (rb_id_table_lookup(&vm->constant_cache, id, &lookup_result)) { + set_table *ics = (set_table *)lookup_result; + set_table_delete(ics, &ic_data); - if (ics->num_entries == 0) { - rb_id_table_delete(vm->constant_cache, id); - st_free_table(ics); + if (ics->num_entries == 0 && + // See comment in vm_track_constant_cache on why we need this check + id != vm->inserting_constant_cache_id) { + rb_id_table_delete(&vm->constant_cache, id); + set_free_table(ics); } } } @@ -146,14 +152,33 @@ iseq_clear_ic_references(const rb_iseq_t *iseq) if (segments == NULL) continue; - for (int i = 0; segments[i]; i++) { + int i; + for (i = 0; segments[i]; i++) { ID id = segments[i]; if (id == idNULL) continue; remove_from_constant_cache(id, ic); } - ruby_xfree((void *)segments); + SIZED_FREE_N(segments, i + 1); + } +} + + +rb_hook_list_t * +rb_iseq_local_hooks(const rb_iseq_t *iseq, rb_ractor_t *r, bool create) +{ + rb_hook_list_t *hook_list = NULL; + st_data_t val; + if (st_lookup(rb_ractor_targeted_hooks(r), (st_data_t)iseq, &val)) { + hook_list = (rb_hook_list_t*)val; + RUBY_ASSERT(hook_list->type == hook_list_type_targeted_iseq); + } + else if (create) { + hook_list = RB_ZALLOC(rb_hook_list_t); + hook_list->type = hook_list_type_targeted_iseq; + st_insert(rb_ractor_targeted_hooks(r), (st_data_t)iseq, (st_data_t)hook_list); } + return hook_list; } void @@ -164,40 +189,51 @@ rb_iseq_free(const rb_iseq_t *iseq) if (iseq && ISEQ_BODY(iseq)) { iseq_clear_ic_references(iseq); struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); - rb_rjit_free_iseq(iseq); /* Notify RJIT */ #if USE_YJIT - rb_yjit_iseq_free(body->yjit_payload); + rb_yjit_iseq_free(iseq); + if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) { + RUBY_ASSERT(rb_yjit_live_iseq_count > 0); + rb_yjit_live_iseq_count--; + } +#endif +#if USE_ZJIT + rb_zjit_iseq_free(iseq); #endif - ruby_xfree((void *)body->iseq_encoded); - ruby_xfree((void *)body->insns_info.body); - if (body->insns_info.positions) ruby_xfree((void *)body->insns_info.positions); + SIZED_FREE_N(body->iseq_encoded, body->iseq_size); + SIZED_FREE_N(body->insns_info.body, body->insns_info.size); + SIZED_FREE_N(body->insns_info.positions, body->insns_info.size); #if VM_INSN_INFO_TABLE_IMPL == 2 - if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table); + ruby_xfree(body->insns_info.succ_index_table); #endif - if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl)) - ruby_xfree((void *)body->local_table); - ruby_xfree((void *)body->is_entries); - - if (body->call_data) { - ruby_xfree(body->call_data); + SIZED_FREE_N(body->is_entries, ISEQ_IS_SIZE(body)); + SIZED_FREE_N(body->call_data, body->ci_size); + if (body->catch_table) { + ruby_xfree_sized(body->catch_table, iseq_catch_table_bytes(body->catch_table->size)); } - ruby_xfree((void *)body->catch_table); - ruby_xfree((void *)body->param.opt_table); + SIZED_FREE_N(body->param.opt_table, body->param.opt_num + 1); if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) { - ruby_xfree((void *)body->mark_bits.list); + SIZED_FREE_N(body->mark_bits.list, ISEQ_MBITS_BUFLEN(body->iseq_size)); } - if (body->param.keyword != NULL) { - ruby_xfree((void *)body->param.keyword->default_values); - ruby_xfree((void *)body->param.keyword); + ISEQ_ORIGINAL_ISEQ_CLEAR(iseq); + + struct rb_iseq_param_keyword *pkw = (struct rb_iseq_param_keyword *)body->param.keyword; + if (pkw != NULL) { + if (pkw->table != &body->local_table[pkw->bits_start - pkw->num]) + SIZED_FREE_N(pkw->table, pkw->required_num); + if (pkw->default_values) { + SIZED_FREE_N(pkw->default_values, pkw->num - pkw->required_num); + } + SIZED_FREE(pkw); } + if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl)) { + SIZED_FREE_N(body->local_table, body->local_table_size); + } + SIZED_FREE_N(body->lvar_states, body->local_table_size); + compile_data_free(ISEQ_COMPILE_DATA(iseq)); if (body->outer_variables) rb_id_table_free(body->outer_variables); - ruby_xfree(body); - } - - if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) { - rb_hook_list_free(iseq->aux.exec.local_hooks); + SIZED_FREE(body); } RUBY_FREE_LEAVE("iseq"); @@ -224,7 +260,30 @@ iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, VALUE *original } static void -rb_iseq_mark_and_move_each_value(const rb_iseq_t *iseq, VALUE *original_iseq) +rb_iseq_mark_and_move_each_compile_data_value(const rb_iseq_t *iseq, VALUE *original_iseq) +{ + unsigned int size; + VALUE *code; + const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); + + size = compile_data->iseq_size; + code = compile_data->iseq_encoded; + + // Embedded VALUEs + if (compile_data->mark_bits.list) { + if(compile_data->is_single_mark_bit) { + iseq_scan_bits(0, compile_data->mark_bits.single, code, original_iseq); + } + else { + for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) { + iseq_bits_t bits = compile_data->mark_bits.list[i]; + iseq_scan_bits(i, bits, code, original_iseq); + } + } + } +} +static void +rb_iseq_mark_and_move_each_body_value(const rb_iseq_t *iseq, VALUE *original_iseq) { unsigned int size; VALUE *code; @@ -243,9 +302,7 @@ rb_iseq_mark_and_move_each_value(const rb_iseq_t *iseq, VALUE *original_iseq) for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) { ICVARC icvarc = (ICVARC)is_entries; if (icvarc->entry) { - RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE)); - - rb_gc_mark_and_move(&icvarc->entry->class_value); + rb_gc_mark_and_move((VALUE *)&icvarc->entry); } } @@ -272,14 +329,37 @@ rb_iseq_mark_and_move_each_value(const rb_iseq_t *iseq, VALUE *original_iseq) iseq_scan_bits(0, body->mark_bits.single, code, original_iseq); } else { - if (body->mark_bits.list) { - for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) { - iseq_bits_t bits = body->mark_bits.list[i]; - iseq_scan_bits(i, bits, code, original_iseq); - } + for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) { + iseq_bits_t bits = body->mark_bits.list[i]; + iseq_scan_bits(i, bits, code, original_iseq); + } + } + } +} + +static bool +cc_is_active(const struct rb_callcache *cc, bool reference_updating) +{ + if (cc) { + if (cc == rb_vm_empty_cc() || rb_vm_empty_cc_for_super()) { + return false; + } + + if (reference_updating) { + cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc); + } + + if (vm_cc_markable(cc) && vm_cc_valid(cc)) { + const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc); + if (reference_updating) { + cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme); + } + if (!METHOD_ENTRY_INVALIDATED(cme)) { + return true; } } } + return false; } void @@ -292,10 +372,8 @@ rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating) if (ISEQ_BODY(iseq)) { struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); - rb_iseq_mark_and_move_each_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL); + rb_iseq_mark_and_move_each_body_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL); - rb_gc_mark_and_move(&body->variable.coverage); - rb_gc_mark_and_move(&body->variable.pc2branchindex); rb_gc_mark_and_move(&body->variable.script_lines); rb_gc_mark_and_move(&body->location.label); rb_gc_mark_and_move(&body->location.base_label); @@ -310,36 +388,22 @@ rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating) if (cds[i].ci) rb_gc_mark_and_move_ptr(&cds[i].ci); - const struct rb_callcache *cc = cds[i].cc; - if (cc) { - if (reference_updating) { - cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc); - } - - if (vm_cc_markable(cc)) { - VM_ASSERT((cc->flags & VM_CALLCACHE_ON_STACK) == 0); - - const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc); - if (reference_updating) { - cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme); - } - - if (cc->klass && !METHOD_ENTRY_INVALIDATED(cme)) { - rb_gc_mark_and_move_ptr(&cds[i].cc); - } - else { - cds[i].cc = rb_vm_empty_cc(); - } - } + if (cc_is_active(cds[i].cc, reference_updating)) { + rb_gc_mark_and_move_ptr(&cds[i].cc); + } + else if (cds[i].cc != rb_vm_empty_cc()) { + cds[i].cc = rb_vm_empty_cc(); } } } - if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { + if (body->param.flags.has_kw && body->param.keyword != NULL) { const struct rb_iseq_param_keyword *const keyword = body->param.keyword; - for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) { - rb_gc_mark_and_move(&keyword->default_values[j]); + if (keyword->default_values != NULL) { + for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) { + rb_gc_mark_and_move(&keyword->default_values[j]); + } } } @@ -356,41 +420,51 @@ rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating) } if (reference_updating) { -#if USE_RJIT - rb_rjit_iseq_update_references(body); -#endif #if USE_YJIT - rb_yjit_iseq_update_references(body->yjit_payload); + rb_yjit_iseq_update_references(iseq); +#endif +#if USE_ZJIT + rb_zjit_iseq_update_references(body->zjit_payload); #endif } else { -#if USE_RJIT - rb_rjit_iseq_mark(body->rjit_blocks); -#endif + // TODO: check jit payload + if (!rb_gc_checking_shareable()) { #if USE_YJIT - rb_yjit_iseq_mark(body->yjit_payload); + rb_yjit_iseq_mark(body->yjit_payload); +#endif +#if USE_ZJIT + rb_zjit_iseq_mark(body->zjit_payload); #endif + } + } + + // TODO: ractor aware coverage + if (!rb_gc_checking_shareable()) { + rb_gc_mark_and_move(&body->variable.coverage); + rb_gc_mark_and_move(&body->variable.pc2branchindex); } } if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) { - rb_gc_mark_and_move(&iseq->aux.loader.obj); + if (!rb_gc_checking_shareable()) { + rb_gc_mark_and_move(&iseq->aux.loader.obj); + } } else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) { - const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); + if (!rb_gc_checking_shareable()) { + const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); - rb_iseq_mark_and_move_insn_storage(compile_data->insn.storage_head); + rb_iseq_mark_and_move_insn_storage(compile_data->insn.storage_head); + rb_iseq_mark_and_move_each_compile_data_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL); - rb_gc_mark_and_move((VALUE *)&compile_data->err_info); - rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary); + rb_gc_mark_and_move((VALUE *)&compile_data->err_info); + rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary); + } } else { /* executable */ VM_ASSERT(ISEQ_EXECUTABLE_P(iseq)); - - if (iseq->aux.exec.local_hooks) { - rb_hook_list_mark_and_update(iseq->aux.exec.local_hooks); - } } RUBY_MARK_LEAVE("iseq"); @@ -495,9 +569,14 @@ rb_iseq_pathobj_new(VALUE path, VALUE realpath) pathobj = rb_fstring(path); } else { - if (!NIL_P(realpath)) realpath = rb_fstring(realpath); - pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath); - rb_obj_freeze(pathobj); + if (!NIL_P(realpath)) { + realpath = rb_fstring(realpath); + } + VALUE fpath = rb_fstring(path); + + pathobj = rb_ary_new_from_args(2, fpath, realpath); + rb_ary_freeze(pathobj); + RB_OBJ_SET_SHAREABLE(pathobj); } return pathobj; } @@ -509,6 +588,24 @@ rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath) rb_iseq_pathobj_new(path, realpath)); } +// Make a dummy iseq for a dummy frame that exposes a path for profilers to inspect +rb_iseq_t * +rb_iseq_alloc_with_dummy_path(VALUE fname) +{ + rb_iseq_t *dummy_iseq = iseq_alloc(); + + ISEQ_BODY(dummy_iseq)->type = ISEQ_TYPE_TOP; + + if (!RB_OBJ_SHAREABLE_P(fname)) { + RB_OBJ_SET_FROZEN_SHAREABLE(fname); + } + + RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.pathobj, fname); + RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.label, fname); + + return dummy_iseq; +} + static rb_iseq_location_t * iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id) { @@ -518,6 +615,11 @@ iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int RB_OBJ_WRITE(iseq, &loc->label, name); RB_OBJ_WRITE(iseq, &loc->base_label, name); loc->first_lineno = first_lineno; + + if (ISEQ_BODY(iseq)->local_iseq == iseq && strcmp(RSTRING_PTR(name), "initialize") == 0) { + ISEQ_BODY(iseq)->param.flags.use_block = 1; + } + if (code_location) { loc->node_id = node_id; loc->code_location = *code_location; @@ -546,11 +648,11 @@ set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq) body->local_iseq = iseq; } else if (piseq) { - body->local_iseq = ISEQ_BODY(piseq)->local_iseq; + RB_OBJ_WRITE(iseq, &body->local_iseq, ISEQ_BODY(piseq)->local_iseq); } if (piseq) { - body->parent_iseq = piseq; + RB_OBJ_WRITE(iseq, &body->parent_iseq, piseq); } if (type == ISEQ_TYPE_MAIN) { @@ -573,6 +675,18 @@ new_arena(void) return new_arena; } +static int +prepare_node_id(const NODE *node) +{ + if (!node) return -1; + + if (nd_type(node) == NODE_SCOPE && RNODE_SCOPE(node)->nd_parent) { + return nd_node_id(RNODE_SCOPE(node)->nd_parent); + } + + return nd_node_id(node); +} + static VALUE prepare_iseq_build(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id, @@ -646,7 +760,7 @@ rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq) if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table); body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size); #if VM_CHECK_MODE == 0 - ruby_xfree(body->insns_info.positions); + SIZED_FREE_N(body->insns_info.positions, body->insns_info.size); body->insns_info.positions = NULL; #endif #endif @@ -700,19 +814,26 @@ finish_iseq_build(rb_iseq_t *iseq) } static rb_compile_option_t COMPILE_OPTION_DEFAULT = { - OPT_INLINE_CONST_CACHE, /* int inline_const_cache; */ - OPT_PEEPHOLE_OPTIMIZATION, /* int peephole_optimization; */ - OPT_TAILCALL_OPTIMIZATION, /* int tailcall_optimization */ - OPT_SPECIALISED_INSTRUCTION, /* int specialized_instruction; */ - OPT_OPERANDS_UNIFICATION, /* int operands_unification; */ - OPT_INSTRUCTIONS_UNIFICATION, /* int instructions_unification; */ - OPT_STACK_CACHING, /* int stack_caching; */ - OPT_FROZEN_STRING_LITERAL, - OPT_DEBUG_FROZEN_STRING_LITERAL, - TRUE, /* coverage_enabled */ + .inline_const_cache = OPT_INLINE_CONST_CACHE, + .peephole_optimization = OPT_PEEPHOLE_OPTIMIZATION, + .tailcall_optimization = OPT_TAILCALL_OPTIMIZATION, + .specialized_instruction = OPT_SPECIALISED_INSTRUCTION, + .operands_unification = OPT_OPERANDS_UNIFICATION, + .instructions_unification = OPT_INSTRUCTIONS_UNIFICATION, + .frozen_string_literal = OPT_FROZEN_STRING_LITERAL, + .debug_frozen_string_literal = OPT_DEBUG_FROZEN_STRING_LITERAL, + .coverage_enabled = TRUE, +}; + +static const rb_compile_option_t COMPILE_OPTION_FALSE = { + .frozen_string_literal = -1, // unspecified }; -static const rb_compile_option_t COMPILE_OPTION_FALSE = {0}; +int +rb_iseq_opt_frozen_string_literal(void) +{ + return COMPILE_OPTION_DEFAULT.frozen_string_literal; +} static void set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) @@ -723,7 +844,7 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) else if (flag == Qfalse) { (o)->mem = 0; } \ } #define SET_COMPILE_OPTION_NUM(o, h, mem) \ - { VALUE num = rb_hash_aref(opt, ID2SYM(rb_intern(#mem))); \ + { VALUE num = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \ if (!NIL_P(num)) (o)->mem = NUM2INT(num); \ } SET_COMPILE_OPTION(option, opt, inline_const_cache); @@ -732,7 +853,6 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) SET_COMPILE_OPTION(option, opt, specialized_instruction); SET_COMPILE_OPTION(option, opt, operands_unification); SET_COMPILE_OPTION(option, opt, instructions_unification); - SET_COMPILE_OPTION(option, opt, stack_caching); SET_COMPILE_OPTION(option, opt, frozen_string_literal); SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal); SET_COMPILE_OPTION(option, opt, coverage_enabled); @@ -741,11 +861,13 @@ set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt) #undef SET_COMPILE_OPTION_NUM } -static void -rb_iseq_make_compile_option(rb_compile_option_t *option, VALUE opt) +static rb_compile_option_t * +set_compile_option_from_ast(rb_compile_option_t *option, const rb_ast_body_t *ast) { - Check_Type(opt, T_HASH); - set_compile_option_from_hash(option, opt); + if (ast->frozen_string_literal >= 0) { + option->frozen_string_literal = ast->frozen_string_literal; + } + return option; } static void @@ -786,43 +908,36 @@ make_compile_option_value(rb_compile_option_t *option) SET_COMPILE_OPTION(option, opt, specialized_instruction); SET_COMPILE_OPTION(option, opt, operands_unification); SET_COMPILE_OPTION(option, opt, instructions_unification); - SET_COMPILE_OPTION(option, opt, stack_caching); - SET_COMPILE_OPTION(option, opt, frozen_string_literal); SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal); SET_COMPILE_OPTION(option, opt, coverage_enabled); SET_COMPILE_OPTION_NUM(option, opt, debug_level); } #undef SET_COMPILE_OPTION #undef SET_COMPILE_OPTION_NUM + VALUE frozen_string_literal = option->frozen_string_literal == -1 ? Qnil : RBOOL(option->frozen_string_literal); + rb_hash_aset(opt, ID2SYM(rb_intern("frozen_string_literal")), frozen_string_literal); return opt; } rb_iseq_t * -rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, +rb_iseq_new(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, enum rb_iseq_type type) { - return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent, - 0, type, &COMPILE_OPTION_DEFAULT); + return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, + 0, type, &COMPILE_OPTION_DEFAULT, + Qnil); } static int -ast_line_count(const rb_ast_body_t *ast) +ast_line_count(const VALUE ast_value) { - if (ast->script_lines == Qfalse) { - // this occurs when failed to parse the source code with a syntax error - return 0; - } - if (RB_TYPE_P(ast->script_lines, T_ARRAY)){ - return (int)RARRAY_LEN(ast->script_lines); - } - return FIX2INT(ast->script_lines); + rb_ast_t *ast = rb_ruby_ast_data_get(ast_value); + return ast->body.line_count; } static VALUE -iseq_setup_coverage(VALUE coverages, VALUE path, const rb_ast_body_t *ast, int line_offset) +iseq_setup_coverage(VALUE coverages, VALUE path, int line_count) { - int line_count = line_offset + ast_line_count(ast); - if (line_count >= 0) { int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count; @@ -836,46 +951,95 @@ iseq_setup_coverage(VALUE coverages, VALUE path, const rb_ast_body_t *ast, int l } static inline void -iseq_new_setup_coverage(VALUE path, const rb_ast_body_t *ast, int line_offset) +iseq_new_setup_coverage(VALUE path, int line_count) { VALUE coverages = rb_get_coverages(); if (RTEST(coverages)) { - iseq_setup_coverage(coverages, path, ast, line_offset); + iseq_setup_coverage(coverages, path, line_count); } } rb_iseq_t * -rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent) +rb_iseq_new_top(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent) +{ + iseq_new_setup_coverage(path, ast_line_count(ast_value)); + + return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, 0, + ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, + Qnil); +} + +/** + * The main entry-point into the prism compiler when a file is required. + */ +rb_iseq_t * +pm_iseq_new_top(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, int *error_state) +{ + iseq_new_setup_coverage(path, (int) (pm_parser_line_offsets(node->parser)->size - 1)); + + return pm_iseq_new_with_opt(node, name, path, realpath, 0, parent, 0, + ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, error_state); +} + +rb_iseq_t * +rb_iseq_new_main(const VALUE ast_value, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt) { - iseq_new_setup_coverage(path, ast, 0); + iseq_new_setup_coverage(path, ast_line_count(ast_value)); - return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent, 0, - ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT); + return rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"), + path, realpath, 0, + parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, + Qnil); } +/** + * The main entry-point into the prism compiler when a file is executed as the + * main file in the program. + */ rb_iseq_t * -rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt) +pm_iseq_new_main(pm_scope_node_t *node, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt, int *error_state) { - iseq_new_setup_coverage(path, ast, 0); + iseq_new_setup_coverage(path, (int) (pm_parser_line_offsets(node->parser)->size - 1)); - return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"), + return pm_iseq_new_with_opt(node, rb_fstring_lit("<main>"), path, realpath, 0, - parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE); + parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, error_state); } rb_iseq_t * -rb_iseq_new_eval(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth) +rb_iseq_new_eval(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth) { if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) { VALUE coverages = rb_get_coverages(); if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) { - iseq_setup_coverage(coverages, path, ast, first_lineno - 1); + iseq_setup_coverage(coverages, path, ast_line_count(ast_value) + first_lineno - 1); } } - return rb_iseq_new_with_opt(ast, name, path, realpath, first_lineno, - parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT); + rb_compile_option_t option = COMPILE_OPTION_DEFAULT; + rb_ast_t *ast = rb_ruby_ast_data_get(ast_value); + if (ast->body.coverage_enabled >= 0) { + option.coverage_enabled = ast->body.coverage_enabled; + } + return rb_iseq_new_with_opt(ast_value, name, path, realpath, first_lineno, + parent, isolated_depth, ISEQ_TYPE_EVAL, &option, + Qnil); +} + +rb_iseq_t * +pm_iseq_new_eval(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth, int *error_state) +{ + if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) { + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) { + iseq_setup_coverage(coverages, path, ((int) (pm_parser_line_offsets(node->parser)->size - 1)) + first_lineno - 1); + } + } + + return pm_iseq_new_with_opt(node, name, path, realpath, first_lineno, + parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT, error_state); } static inline rb_iseq_t * @@ -893,41 +1057,134 @@ iseq_translate(rb_iseq_t *iseq) } rb_iseq_t * -rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, +rb_iseq_new_with_opt(VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth, - enum rb_iseq_type type, const rb_compile_option_t *option) + enum rb_iseq_type type, const rb_compile_option_t *option, + VALUE script_lines) { - const NODE *node = ast ? ast->root : 0; + rb_ast_t *ast = rb_ruby_ast_data_get(ast_value); + rb_ast_body_t *body = ast ? &ast->body : NULL; + const NODE *node = body ? body->root : 0; /* TODO: argument check */ rb_iseq_t *iseq = iseq_alloc(); rb_compile_option_t new_opt; - if (option) { + if (!option) option = &COMPILE_OPTION_DEFAULT; + if (body) { new_opt = *option; + option = set_compile_option_from_ast(&new_opt, body); } - else { - new_opt = COMPILE_OPTION_DEFAULT; - } - if (ast && ast->compile_option) rb_iseq_make_compile_option(&new_opt, ast->compile_option); - VALUE script_lines = Qnil; - - if (ast && !FIXNUM_P(ast->script_lines) && ast->script_lines) { - script_lines = ast->script_lines; + if (!NIL_P(script_lines)) { + // noop + } + else if (body && body->script_lines) { + script_lines = rb_parser_build_script_lines_from(body->script_lines); } else if (parent) { script_lines = ISEQ_BODY(parent)->variable.script_lines; } - prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, node ? nd_node_id(node) : -1, - parent, isolated_depth, type, script_lines, &new_opt); + prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, prepare_node_id(node), + parent, isolated_depth, type, script_lines, option); rb_iseq_compile_node(iseq, node); finish_iseq_build(iseq); + RB_GC_GUARD(ast_value); + + return iseq_translate(iseq); +} + +/** + * Core implementation for building a prism iseq. This does not use rb_protect, + * so any exceptions (e.g. from finish_iseq_build) propagate normally up the + * call stack — matching the parse.y compiler's behavior. + */ +rb_iseq_t * +pm_iseq_build(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth, + enum rb_iseq_type type, const rb_compile_option_t *option) +{ + rb_iseq_t *iseq = iseq_alloc(); + ISEQ_BODY(iseq)->prism = true; + + rb_compile_option_t next_option; + if (!option) option = &COMPILE_OPTION_DEFAULT; + + next_option = *option; + next_option.coverage_enabled = node->coverage_enabled < 0 ? 0 : node->coverage_enabled > 0; + option = &next_option; + + pm_location_t *location = &node->base.location; + int32_t start_line = pm_parser_start_line(node->parser); + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(node->parser); + + pm_line_column_t start = pm_line_offset_list_line_column(line_offsets, location->start, start_line); + pm_line_column_t end = pm_line_offset_list_line_column(line_offsets, location->start + location->length, start_line); + + rb_code_location_t code_location = (rb_code_location_t) { + .beg_pos = { .lineno = (int) start.line, .column = (int) start.column }, + .end_pos = { .lineno = (int) end.line, .column = (int) end.column } + }; + + prepare_iseq_build(iseq, name, path, realpath, first_lineno, &code_location, node->ast_node->node_id, + parent, isolated_depth, type, node->script_lines == NULL ? Qnil : *node->script_lines, option); + + pm_iseq_compile_node(iseq, node); + finish_iseq_build(iseq); return iseq_translate(iseq); } +struct pm_iseq_new_with_opt_data { + rb_iseq_t *iseq; + pm_scope_node_t *node; + VALUE name, path, realpath; + int first_lineno, isolated_depth; + const rb_iseq_t *parent; + enum rb_iseq_type type; + const rb_compile_option_t *option; +}; + +static VALUE +pm_iseq_new_with_opt_try(VALUE d) +{ + struct pm_iseq_new_with_opt_data *data = (struct pm_iseq_new_with_opt_data *)d; + data->iseq = pm_iseq_build(data->node, data->name, data->path, data->realpath, + data->first_lineno, data->parent, data->isolated_depth, + data->type, data->option); + return Qundef; +} + +/** + * This is a step in the prism compiler that is called once all of the various + * options have been established. It is called from one of the pm_iseq_new_* + * functions or from the RubyVM::InstructionSequence APIs. + * + * This function uses rb_protect to catch exceptions, storing the error state + * in the provided out parameter. This is only needed at top-level entry points + * where the caller wants to handle errors gracefully. Child iseqs compiled + * during the compilation process do NOT go through this function — they use + * pm_iseq_build directly, letting exceptions propagate naturally (matching + * the parse.y compiler's behavior). + */ +rb_iseq_t * +pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, + int first_lineno, const rb_iseq_t *parent, int isolated_depth, + enum rb_iseq_type type, const rb_compile_option_t *option, int *error_state) +{ + struct pm_iseq_new_with_opt_data data = { + .node = node, .name = name, .path = path, .realpath = realpath, + .first_lineno = first_lineno, .parent = parent, + .isolated_depth = isolated_depth, .type = type, .option = option + }; + rb_protect(pm_iseq_new_with_opt_try, (VALUE)&data, error_state); + + if (*error_state) return NULL; + + return data.iseq; +} + rb_iseq_t * rb_iseq_new_with_callback( const struct rb_iseq_new_with_callback_callback_func * ifunc, @@ -953,7 +1210,22 @@ rb_iseq_load_iseq(VALUE fname) VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname); if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) { - return iseqw_check(iseqv); + return iseqw_check(iseqv); + } + + return NULL; +} + +const rb_iseq_t * +rb_iseq_compile_iseq(VALUE str, VALUE fname) +{ + VALUE args[] = { + str, fname + }; + VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("compile"), 2, args); + + if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) { + return iseqw_check(iseqv); } return NULL; @@ -1049,6 +1321,10 @@ iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt) tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3)); } + if (SYM2ID(rb_hash_aref(misc, ID2SYM(rb_intern("parser")))) == rb_intern("prism")) { + ISEQ_BODY(iseq)->prism = true; + } + make_compile_option(&option, opt); option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */ prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id), @@ -1088,9 +1364,10 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V #else # define INITIALIZED /* volatile */ #endif - rb_ast_t *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start); + VALUE (*parse)(VALUE vparser, VALUE fname, VALUE file, int start); int ln; - rb_ast_t *INITIALIZED ast; + VALUE INITIALIZED ast_value; + rb_ast_t *ast; VALUE name = rb_fstring_lit("<compiled>"); /* safe results first */ @@ -1106,26 +1383,105 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V } { const VALUE parser = rb_parser_new(); - const rb_iseq_t *outer_scope = rb_iseq_new(NULL, name, name, Qnil, 0, ISEQ_TYPE_TOP); + const rb_iseq_t *outer_scope = rb_iseq_new(Qnil, name, name, Qnil, 0, ISEQ_TYPE_TOP); VALUE outer_scope_v = (VALUE)outer_scope; rb_parser_set_context(parser, outer_scope, FALSE); + if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser); RB_GC_GUARD(outer_scope_v); - ast = (*parse)(parser, file, src, ln); + ast_value = (*parse)(parser, file, src, ln); } - if (!ast->body.root) { + ast = rb_ruby_ast_data_get(ast_value); + + if (!ast || !ast->body.root) { rb_ast_dispose(ast); rb_exc_raise(GET_EC()->errinfo); } else { - iseq = rb_iseq_new_with_opt(&ast->body, name, file, realpath, ln, - NULL, 0, ISEQ_TYPE_TOP, &option); + iseq_new_setup_coverage(file, ast_line_count(ast_value)); + iseq = rb_iseq_new_with_opt(ast_value, name, file, realpath, ln, + NULL, 0, ISEQ_TYPE_TOP, &option, + Qnil); rb_ast_dispose(ast); } return iseq; } +static rb_iseq_t * +pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt) +{ + rb_iseq_t *iseq = NULL; + rb_compile_option_t option; + int ln; + VALUE name = rb_fstring_lit("<compiled>"); + + /* safe results first */ + make_compile_option(&option, opt); + ln = NUM2INT(line); + StringValueCStr(file); + + bool parse_file = false; + if (RB_TYPE_P(src, T_FILE)) { + parse_file = true; + src = rb_io_path(src); + } + else { + src = StringValue(src); + } + + pm_parse_result_t result; + pm_parse_result_init(&result); + pm_options_line_set(result.options, NUM2INT(line)); + pm_options_scopes_init(result.options, 1); + result.node.coverage_enabled = 1; + + switch (option.frozen_string_literal) { + case ISEQ_FROZEN_STRING_LITERAL_UNSET: + break; + case ISEQ_FROZEN_STRING_LITERAL_DISABLED: + pm_options_frozen_string_literal_set(result.options, false); + break; + case ISEQ_FROZEN_STRING_LITERAL_ENABLED: + pm_options_frozen_string_literal_set(result.options, true); + break; + default: + rb_bug("pm_iseq_compile_with_option: invalid frozen_string_literal=%d", option.frozen_string_literal); + break; + } + + VALUE script_lines; + VALUE error; + + if (parse_file) { + error = pm_load_parse_file(&result, src, ruby_vm_keep_script_lines ? &script_lines : NULL); + } + else { + error = pm_parse_string(&result, src, file, ruby_vm_keep_script_lines ? &script_lines : NULL); + } + + RB_GC_GUARD(src); + + if (error == Qnil) { + int error_state; + iseq_new_setup_coverage(file, (int) (pm_parser_line_offsets(result.node.parser)->size - 1)); + iseq = pm_iseq_new_with_opt(&result.node, name, file, realpath, ln, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state); + + pm_parse_result_free(&result); + + if (error_state) { + RUBY_ASSERT(iseq == NULL); + rb_jump_tag(error_state); + } + } + else { + pm_parse_result_free(&result); + rb_exc_raise(error); + } + + return iseq; +} + VALUE rb_iseq_path(const rb_iseq_t *iseq) { @@ -1210,8 +1566,8 @@ remove_coverage_i(void *vstart, void *vend, size_t stride, void *data) { VALUE v = (VALUE)vstart; for (; v != (VALUE)vend; v += stride) { - void *ptr = asan_poisoned_object_p(v); - asan_unpoison_object(v, false); + void *ptr = rb_asan_poisoned_object_p(v); + rb_asan_unpoison_object(v, false); if (rb_obj_is_iseq(v)) { rb_iseq_t *iseq = (rb_iseq_t *)v; @@ -1232,20 +1588,25 @@ rb_iseq_remove_coverage_all(void) /* define wrapper class methods (RubyVM::InstructionSequence) */ static void -iseqw_mark(void *ptr) +iseqw_mark_and_move(void *ptr) { - rb_gc_mark((VALUE)ptr); + rb_gc_mark_and_move((VALUE *)ptr); } static size_t iseqw_memsize(const void *ptr) { - return rb_iseq_memsize((const rb_iseq_t *)ptr); + return rb_iseq_memsize(*(const rb_iseq_t **)ptr); } static const rb_data_type_t iseqw_data_type = { "T_IMEMO/iseq", - {iseqw_mark, NULL, iseqw_memsize,}, + { + iseqw_mark_and_move, + RUBY_TYPED_DEFAULT_FREE, + iseqw_memsize, + iseqw_mark_and_move, + }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED }; @@ -1253,18 +1614,20 @@ static VALUE iseqw_new(const rb_iseq_t *iseq) { if (iseq->wrapper) { + if (*(const rb_iseq_t **)rb_check_typeddata(iseq->wrapper, &iseqw_data_type) != iseq) { + rb_raise(rb_eTypeError, "wrong iseq wrapper: %" PRIsVALUE " for %p", + iseq->wrapper, (void *)iseq); + } return iseq->wrapper; } else { - union { const rb_iseq_t *in; void *out; } deconst; - VALUE obj; - deconst.in = iseq; - obj = TypedData_Wrap_Struct(rb_cISeq, &iseqw_data_type, deconst.out); - RB_OBJ_WRITTEN(obj, Qundef, iseq); + rb_iseq_t **ptr; + VALUE obj = TypedData_Make_Struct(rb_cISeq, rb_iseq_t *, &iseqw_data_type, ptr); + RB_OBJ_WRITE(obj, ptr, iseq); /* cache a wrapper object */ + RB_OBJ_SET_FROZEN_SHAREABLE((VALUE)obj); RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj); - RB_OBJ_FREEZE((VALUE)iseq); return obj; } @@ -1276,19 +1639,57 @@ rb_iseqw_new(const rb_iseq_t *iseq) return iseqw_new(iseq); } +/** + * Accept the options given to InstructionSequence.compile and + * InstructionSequence.compile_prism and share the logic for creating the + * instruction sequence. + */ +static VALUE +iseqw_s_compile_parser(int argc, VALUE *argv, VALUE self, bool prism) +{ + VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil; + int i; + + i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); + if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5); + switch (i) { + case 5: opt = argv[--i]; + case 4: line = argv[--i]; + case 3: path = argv[--i]; + case 2: file = argv[--i]; + } + + if (NIL_P(file)) file = rb_fstring_lit("<compiled>"); + if (NIL_P(path)) path = file; + if (NIL_P(line)) line = INT2FIX(1); + + Check_Type(path, T_STRING); + Check_Type(file, T_STRING); + + rb_iseq_t *iseq; + if (prism) { + iseq = pm_iseq_compile_with_option(src, file, path, line, opt); + } + else { + iseq = rb_iseq_compile_with_option(src, file, path, line, opt); + } + + return iseqw_new(iseq); +} + /* * call-seq: * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq * - * Takes +source+, a String of Ruby code and compiles it to an - * InstructionSequence. + * Takes +source+, which can be a string of Ruby code, or an open +File+ object. + * that contains Ruby source code. * * Optionally takes +file+, +path+, and +line+ which describe the file path, * real path and first line number of the ruby code in +source+ which are * metadata attached to the returned +iseq+. * - * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for + * +file+ is used for +__FILE__+ and exception backtrace. +path+ is used for * +require_relative+ base. It is recommended these should be the same full * path. * @@ -1304,6 +1705,10 @@ rb_iseqw_new(const rb_iseq_t *iseq) * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path)) * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1> * + * file = File.open("test.rb") + * RubyVM::InstructionSequence.compile(file) + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1> + * * path = File.expand_path("test.rb") * RubyVM::InstructionSequence.compile(File.read(path), path, path) * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1> @@ -1312,28 +1717,95 @@ rb_iseqw_new(const rb_iseq_t *iseq) static VALUE iseqw_s_compile(int argc, VALUE *argv, VALUE self) { - VALUE src, file = Qnil, path = Qnil, line = INT2FIX(1), opt = Qnil; - int i; - - i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); - if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5); - switch (i) { - case 5: opt = argv[--i]; - case 4: line = argv[--i]; - case 3: path = argv[--i]; - case 2: file = argv[--i]; - } - - if (NIL_P(file)) file = rb_fstring_lit("<compiled>"); - if (NIL_P(path)) path = file; - if (NIL_P(line)) line = INT2FIX(1); + return iseqw_s_compile_parser(argc, argv, self, rb_ruby_prism_p()); +} - Check_Type(path, T_STRING); - Check_Type(file, T_STRING); +/* + * call-seq: + * InstructionSequence.compile_parsey(source[, file[, path[, line[, options]]]]) -> iseq + * + * Takes +source+, which can be a string of Ruby code, or an open +File+ object. + * that contains Ruby source code. It parses and compiles using parse.y. + * + * Optionally takes +file+, +path+, and +line+ which describe the file path, + * real path and first line number of the ruby code in +source+ which are + * metadata attached to the returned +iseq+. + * + * +file+ is used for +__FILE__+ and exception backtrace. +path+ is used for + * +require_relative+ base. It is recommended these should be the same full + * path. + * + * +options+, which can be +true+, +false+ or a +Hash+, is used to + * modify the default behavior of the Ruby iseq compiler. + * + * For details regarding valid compile options see ::compile_option=. + * + * RubyVM::InstructionSequence.compile_parsey("a = 1 + 2") + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>> + * + * path = "test.rb" + * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, File.expand_path(path)) + * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1> + * + * file = File.open("test.rb") + * RubyVM::InstructionSequence.compile_parsey(file) + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1> + * + * path = File.expand_path("test.rb") + * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, path) + * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1> + * + */ +static VALUE +iseqw_s_compile_parsey(int argc, VALUE *argv, VALUE self) +{ + return iseqw_s_compile_parser(argc, argv, self, false); +} - return iseqw_new(rb_iseq_compile_with_option(src, file, path, line, opt)); +/* + * call-seq: + * InstructionSequence.compile_prism(source[, file[, path[, line[, options]]]]) -> iseq + * + * Takes +source+, which can be a string of Ruby code, or an open +File+ object. + * that contains Ruby source code. It parses and compiles using prism. + * + * Optionally takes +file+, +path+, and +line+ which describe the file path, + * real path and first line number of the ruby code in +source+ which are + * metadata attached to the returned +iseq+. + * + * +file+ is used for +__FILE__+ and exception backtrace. +path+ is used for + * +require_relative+ base. It is recommended these should be the same full + * path. + * + * +options+, which can be +true+, +false+ or a +Hash+, is used to + * modify the default behavior of the Ruby iseq compiler. + * + * For details regarding valid compile options see ::compile_option=. + * + * RubyVM::InstructionSequence.compile_prism("a = 1 + 2") + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>> + * + * path = "test.rb" + * RubyVM::InstructionSequence.compile_prism(File.read(path), path, File.expand_path(path)) + * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1> + * + * file = File.open("test.rb") + * RubyVM::InstructionSequence.compile_prism(file) + * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1> + * + * path = File.expand_path("test.rb") + * RubyVM::InstructionSequence.compile_prism(File.read(path), path, path) + * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1> + * + */ +static VALUE +iseqw_s_compile_prism(int argc, VALUE *argv, VALUE self) +{ + return iseqw_s_compile_parser(argc, argv, self, true); } +static VALUE iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self); + /* * call-seq: * InstructionSequence.compile_file(file[, options]) -> iseq @@ -1357,9 +1829,14 @@ iseqw_s_compile(int argc, VALUE *argv, VALUE self) static VALUE iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) { + if (rb_ruby_prism_p()) { + return iseqw_s_compile_file_prism(argc, argv, self); + } + VALUE file, opt = Qnil; VALUE parser, f, exc = Qnil, ret; rb_ast_t *ast; + VALUE ast_value; rb_compile_option_t option; int i; @@ -1378,7 +1855,9 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) parser = rb_parser_new(); rb_parser_set_context(parser, NULL, FALSE); - ast = (rb_ast_t *)rb_parser_load_file(parser, file); + ast_value = rb_parser_load_file(parser, file); + iseq_new_setup_coverage(file, ast_line_count(ast_value)); + ast = rb_ruby_ast_data_get(ast_value); if (!ast->body.root) exc = GET_EC()->errinfo; rb_io_close(f); @@ -1389,11 +1868,13 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) make_compile_option(&option, opt); - ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"), + ret = iseqw_new(rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"), file, rb_realpath_internal(Qnil, file, 1), - 1, NULL, 0, ISEQ_TYPE_TOP, &option)); + 1, NULL, 0, ISEQ_TYPE_TOP, &option, + Qnil)); rb_ast_dispose(ast); + RB_GC_GUARD(ast_value); rb_vm_pop_frame(ec); RB_GC_GUARD(v); @@ -1402,6 +1883,96 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) /* * call-seq: + * InstructionSequence.compile_file_prism(file[, options]) -> iseq + * + * Takes +file+, a String with the location of a Ruby source file, reads, + * parses and compiles the file, and returns +iseq+, the compiled + * InstructionSequence with source location metadata set. It parses and + * compiles using prism. + * + * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to + * modify the default behavior of the Ruby iseq compiler. + * + * For details regarding valid compile options see ::compile_option=. + * + * # /tmp/hello.rb + * puts "Hello, world!" + * + * # elsewhere + * RubyVM::InstructionSequence.compile_file_prism("/tmp/hello.rb") + * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb> + */ +static VALUE +iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self) +{ + VALUE file, opt = Qnil, ret; + rb_compile_option_t option; + int i; + + i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt); + if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2); + switch (i) { + case 2: opt = argv[--i]; + } + FilePathValue(file); + file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */ + + rb_execution_context_t *ec = GET_EC(); + VALUE v = rb_vm_push_frame_fname(ec, file); + + make_compile_option(&option, opt); + + pm_parse_result_t result; + pm_parse_result_init(&result); + result.node.coverage_enabled = 1; + + switch (option.frozen_string_literal) { + case ISEQ_FROZEN_STRING_LITERAL_UNSET: + break; + case ISEQ_FROZEN_STRING_LITERAL_DISABLED: + pm_options_frozen_string_literal_set(result.options, false); + break; + case ISEQ_FROZEN_STRING_LITERAL_ENABLED: + pm_options_frozen_string_literal_set(result.options, true); + break; + default: + rb_bug("iseqw_s_compile_file_prism: invalid frozen_string_literal=%d", option.frozen_string_literal); + break; + } + + VALUE script_lines; + VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL); + + if (error == Qnil) { + int error_state; + iseq_new_setup_coverage(file, (int) (pm_parser_line_offsets(result.node.parser)->size - 1)); + rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("<main>"), + file, + rb_realpath_internal(Qnil, file, 1), + 1, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state); + + pm_parse_result_free(&result); + + if (error_state) { + RUBY_ASSERT(iseq == NULL); + rb_jump_tag(error_state); + } + + ret = iseqw_new(iseq); + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); + return ret; + } + else { + pm_parse_result_free(&result); + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); + rb_exc_raise(error); + } +} + +/* + * call-seq: * InstructionSequence.compile_option = options * * Sets the default values for various optimizations in the Ruby iseq @@ -1422,7 +1993,6 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) * * +:operands_unification+ * * +:peephole_optimization+ * * +:specialized_instruction+ - * * +:stack_caching+ * * +:tailcall_optimization+ * * Additionally, +:debug_level+ can be set to an integer. @@ -1457,7 +2027,9 @@ iseqw_s_compile_option_get(VALUE self) static const rb_iseq_t * iseqw_check(VALUE iseqw) { - rb_iseq_t *iseq = DATA_PTR(iseqw); + rb_iseq_t **iseq_ptr; + TypedData_Get_Struct(iseqw, rb_iseq_t *, &iseqw_data_type, iseq_ptr); + rb_iseq_t *iseq = *iseq_ptr; if (!ISEQ_BODY(iseq)) { rb_ibf_load_iseq_complete(iseq); @@ -1486,7 +2058,11 @@ rb_iseqw_to_iseq(VALUE iseqw) static VALUE iseqw_eval(VALUE self) { - return rb_iseq_eval(iseqw_check(self)); + const rb_iseq_t *iseq = iseqw_check(self); + if (0 == ISEQ_BODY(iseq)->iseq_size) { + rb_raise(rb_eTypeError, "attempt to evaluate dummy InstructionSequence"); + } + return rb_iseq_eval(iseq, rb_current_box()); } /* @@ -1920,15 +2496,22 @@ rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos) } } +static void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos); + +// Clear tracing event flags and turn off tracing for a given instruction as needed. +// This is currently used after updating a one-shot line coverage for the current instruction. void rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset) { - struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos); - if (entry) { - entry->events &= ~reset; - if (!(entry->events & iseq->aux.exec.global_trace_events)) { - void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos); - rb_iseq_trace_flag_cleared(iseq, pos); + RB_VM_LOCKING() { + rb_vm_barrier(); + + struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos); + if (entry) { + entry->events &= ~reset; + if (!(entry->events & iseq->aux.exec.global_trace_events)) { + rb_iseq_trace_flag_cleared(iseq, pos); + } } } } @@ -1950,7 +2533,7 @@ local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op) if (!name) { name = rb_str_new_cstr("?"); } - else if (!rb_str_symname_p(name)) { + else if (!rb_is_local_id(lid)) { name = rb_str_inspect(name); } else { @@ -2100,17 +2683,18 @@ rb_insn_operand_intern(const rb_iseq_t *iseq, VALUE flags = rb_ary_new(); # define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n)) CALL_FLAG(ARGS_SPLAT); + CALL_FLAG(ARGS_SPLAT_MUT); CALL_FLAG(ARGS_BLOCKARG); CALL_FLAG(FCALL); CALL_FLAG(VCALL); CALL_FLAG(ARGS_SIMPLE); - CALL_FLAG(BLOCKISEQ); CALL_FLAG(TAILCALL); CALL_FLAG(SUPER); CALL_FLAG(ZSUPER); CALL_FLAG(KWARG); CALL_FLAG(KW_SPLAT); CALL_FLAG(KW_SPLAT_MUT); + CALL_FLAG(FORWARDING); CALL_FLAG(OPT_SEND); /* maybe not reachable */ rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|"))); } @@ -2208,7 +2792,7 @@ rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos, { rb_event_flag_t events = rb_iseq_event_flags(iseq, pos); if (events) { - str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s]", + str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s%s]", events & RUBY_EVENT_LINE ? "Li" : "", events & RUBY_EVENT_CLASS ? "Cl" : "", events & RUBY_EVENT_END ? "En" : "", @@ -2218,6 +2802,7 @@ rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos, events & RUBY_EVENT_C_RETURN ? "Cr" : "", events & RUBY_EVENT_B_CALL ? "Bc" : "", events & RUBY_EVENT_B_RETURN ? "Br" : "", + events & RUBY_EVENT_RESCUE ? "Rs" : "", events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "", events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : ""); } @@ -2307,11 +2892,20 @@ rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) rb_str_cat2(str, "== disasm: "); rb_str_append(str, iseq_inspect(iseq)); - rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "true" : "false"); if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) { rb_str_modify_expand(str, header_minlen - l); memset(RSTRING_END(str), '=', header_minlen - l); } + if (iseq->body->builtin_attrs) { +#define disasm_builtin_attr(str, iseq, attr) \ + if (iseq->body->builtin_attrs & BUILTIN_ATTR_ ## attr) { \ + rb_str_cat2(str, " " #attr); \ + } + disasm_builtin_attr(str, iseq, LEAF); + disasm_builtin_attr(str, iseq, SINGLE_NOARG_LEAF); + disasm_builtin_attr(str, iseq, INLINE_BLOCK); + disasm_builtin_attr(str, iseq, C_TRACE); + } rb_str_cat2(str, "\n"); /* show catch table information */ @@ -2384,11 +2978,11 @@ rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) } snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */ - body->param.lead_num > li ? "Arg" : "", + (body->param.lead_num > li) ? (body->param.flags.ambiguous_param0 ? "AmbiguousArg" : "Arg") : "", opti, - (body->param.flags.has_rest && body->param.rest_start == li) ? "Rest" : "", + (body->param.flags.has_rest && body->param.rest_start == li) ? (body->param.flags.anon_rest ? "AnonRest" : "Rest") : "", (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "", - (body->param.flags.has_kwrest && keyword->rest_start == li) ? "Kwrest" : "", + (body->param.flags.has_kwrest && keyword->rest_start == li) ? (body->param.flags.anon_kwrest ? "AnonKwrest" : "Kwrest") : "", (body->param.flags.has_block && body->param.block_start == li) ? "Block" : ""); rb_str_cat(str, indent_str, indent_len); @@ -2436,24 +3030,31 @@ rb_iseq_disasm(const rb_iseq_t *iseq) attr_index_t rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq) { - struct rb_id_table * iv_names = rb_id_table_create(0); + set_table iv_names = { 0 }; + set_init_embedded_numtable_with_size(&iv_names, 0); for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) { IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i]; if (cache->iv_set_name) { - rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue); + set_insert(&iv_names, cache->iv_set_name); } } - attr_index_t count = (attr_index_t)rb_id_table_size(iv_names); + size_t count = iv_names.num_entries; VALUE superclass = rb_class_superclass(klass); - count += RCLASS_EXT(superclass)->max_iv_count; + if (!NIL_P(superclass)) { // BasicObject doesn't have a superclass + count += RCLASS_MAX_IV_COUNT(superclass); + } - rb_id_table_free(iv_names); + set_free_embedded_table(&iv_names); - return count; + if (count > (attr_index_t)-1) { + return (attr_index_t)-1; + } + + return (attr_index_t)count; } /* @@ -2563,6 +3164,7 @@ push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE a C(RUBY_EVENT_END, "end", INT2FIX(line)); C(RUBY_EVENT_RETURN, "return", INT2FIX(line)); C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line)); + C(RUBY_EVENT_RESCUE, "rescue", INT2FIX(line)); #undef C } @@ -2630,7 +3232,10 @@ iseqw_s_of(VALUE klass, VALUE body) { const rb_iseq_t *iseq = NULL; - if (rb_obj_is_proc(body)) { + if (rb_frame_info_p(body)) { + iseq = rb_get_iseq_from_frame_info(body); + } + else if (rb_obj_is_proc(body)) { iseq = vm_proc_iseq(body); if (!rb_obj_is_iseq((VALUE)iseq)) { @@ -2652,10 +3257,10 @@ iseqw_s_of(VALUE klass, VALUE body) * InstructionSequence.disasm(body) -> str * InstructionSequence.disassemble(body) -> str * - * Takes +body+, a Method or Proc object, and returns a String with the - * human readable instructions for +body+. + * Takes +body+, a +Method+ or +Proc+ object, and returns a +String+ + * with the human readable instructions for +body+. * - * For a Method object: + * For a +Method+ object: * * # /tmp/method.rb * def hello @@ -2670,12 +3275,12 @@ iseqw_s_of(VALUE klass, VALUE body) * 0000 trace 8 ( 1) * 0002 trace 1 ( 2) * 0004 putself - * 0005 putstring "hello, world" + * 0005 dupstring "hello, world" * 0007 send :puts, 1, nil, 8, <ic:0> * 0013 trace 16 ( 3) * 0015 leave ( 2) * - * For a Proc: + * For a +Proc+ object: * * # /tmp/proc.rb * p = proc { num = 1 + 2 } @@ -2706,17 +3311,6 @@ iseqw_s_disasm(VALUE klass, VALUE body) return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw)); } -const char * -ruby_node_name(int node) -{ - switch (node) { -#include "node_name.inc" - default: - rb_bug("unknown node: %d", node); - return 0; - } -} - static VALUE register_label(struct st_table *table, unsigned long idx) { @@ -2746,7 +3340,7 @@ static int cdhash_each(VALUE key, VALUE value, VALUE ary) { rb_ary_push(ary, obj_resurrect(key)); - rb_ary_push(ary, value); + rb_ary_push(ary, INT2FIX(value)); return ST_CONTINUE; } @@ -2805,6 +3399,7 @@ iseq_type_id(enum rb_iseq_type type) static VALUE iseq_data_to_ary(const rb_iseq_t *iseq) { + VALUE iseq_value = (VALUE)iseq; unsigned int i; long l; const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq); @@ -2822,7 +3417,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) VALUE exception = rb_ary_new(); /* [[....]] */ VALUE misc = rb_hash_new(); - static ID insn_syms[VM_INSTRUCTION_SIZE/2]; /* w/o-trace only */ + static ID insn_syms[VM_BARE_INSTRUCTION_SIZE]; /* w/o-trace only */ struct st_table *labels_table = st_init_numtable(); VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table); @@ -2840,7 +3435,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) for (i=0; i<iseq_body->local_table_size; i++) { ID lid = iseq_body->local_table[i]; if (lid) { - if (rb_id2str(lid)) { + if (lid != idItImplicit && rb_id2str(lid)) { rb_ary_push(locals, ID2SYM(lid)); } else { /* hidden variable from id_internal() */ @@ -2894,6 +3489,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) } if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start)); if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue); + if (iseq_body->param.flags.use_block) rb_hash_aset(params, ID2SYM(rb_intern("use_block")), Qtrue); } /* body */ @@ -2985,11 +3581,11 @@ iseq_data_to_ary(const rb_iseq_t *iseq) break; case TS_CDHASH: { - VALUE hash = *seq; + VALUE cdhash = *seq; VALUE val = rb_ary_new(); int i; - rb_hash_foreach(hash, cdhash_each, val); + st_foreach(rb_imemo_cdhash_tbl(cdhash), cdhash_each, val); for (i=0; i<RARRAY_LEN(val); i+=2) { VALUE pos = FIX2INT(rb_ary_entry(val, i+1)); @@ -3091,6 +3687,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) CHECK_EVENT(RUBY_EVENT_RETURN); CHECK_EVENT(RUBY_EVENT_B_CALL); CHECK_EVENT(RUBY_EVENT_B_RETURN); + CHECK_EVENT(RUBY_EVENT_RESCUE); #undef CHECK_EVENT prev_insn_info = info; } @@ -3114,6 +3711,7 @@ iseq_data_to_ary(const rb_iseq_t *iseq) #ifdef USE_ISEQ_NODE_ID rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids); #endif + rb_hash_aset(misc, ID2SYM(rb_intern("parser")), iseq_body->prism ? ID2SYM(rb_intern("prism")) : ID2SYM(rb_intern("parse.y"))); /* * [:magic, :major_version, :minor_version, :format_type, :misc, @@ -3134,6 +3732,9 @@ iseq_data_to_ary(const rb_iseq_t *iseq) rb_ary_push(val, params); rb_ary_push(val, exception); rb_ary_push(val, body); + + RB_GC_GUARD(iseq_value); + return val; } @@ -3147,19 +3748,28 @@ rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) ID req, opt, rest, block, key, keyrest; #define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type)) #define PARAM_ID(i) body->local_table[(i)] -#define PARAM(i, type) ( \ - PARAM_TYPE(type), \ - rb_id2str(PARAM_ID(i)) ? \ - rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \ +#define PARAM(i, type) ( \ + PARAM_TYPE(type), \ + PARAM_ID(i) != idItImplicit && rb_id2str(PARAM_ID(i)) ? \ + rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \ a) CONST_ID(req, "req"); CONST_ID(opt, "opt"); + + if (body->param.flags.forwardable) { + // [[:rest, :*], [:keyrest, :**], [:block, :&]] + CONST_ID(rest, "rest"); + CONST_ID(keyrest, "keyrest"); + CONST_ID(block, "block"); + rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(rest), ID2SYM(idMULT))); + rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(keyrest), ID2SYM(idPow))); + rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(block), ID2SYM(idAnd))); + } + if (is_proc) { for (i = 0; i < body->param.lead_num; i++) { - PARAM_TYPE(opt); - rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); - rb_ary_push(args, a); + rb_ary_push(args, PARAM(i, opt)); } } else { @@ -3169,11 +3779,7 @@ rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) } r = body->param.lead_num + body->param.opt_num; for (; i < r; i++) { - PARAM_TYPE(opt); - if (rb_id2str(PARAM_ID(i))) { - rb_ary_push(a, ID2SYM(PARAM_ID(i))); - } - rb_ary_push(args, a); + rb_ary_push(args, PARAM(i, opt)); } if (body->param.flags.has_rest) { CONST_ID(rest, "rest"); @@ -3182,9 +3788,7 @@ rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) r = body->param.post_start + body->param.post_num; if (is_proc) { for (i = body->param.post_start; i < r; i++) { - PARAM_TYPE(opt); - rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil); - rb_ary_push(args, a); + rb_ary_push(args, PARAM(i, opt)); } } else { @@ -3233,7 +3837,13 @@ rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc) } rb_ary_push(args, a); } - if (body->param.flags.has_block) { + if (body->param.flags.accepts_no_block) { + ID noblock; + CONST_ID(noblock, "noblock"); + PARAM_TYPE(noblock); + rb_ary_push(args, a); + } + else if (body->param.flags.has_block) { CONST_ID(block, "block"); rb_ary_push(args, PARAM(body->param.block_start, block)); } @@ -3266,51 +3876,73 @@ rb_iseq_defined_string(enum defined_type type) return rb_fstring_cstr(estr); } -/* A map from encoded_insn to insn_data: decoded insn number, its len, - * non-trace version of encoded insn, and trace version. */ - +// A map from encoded_insn to insn_data: decoded insn number, its len, +// decoded ZJIT insn number, non-trace version of encoded insn, +// trace version, and zjit version. static st_table *encoded_insn_data; typedef struct insn_data_struct { int insn; int insn_len; void *notrace_encoded_insn; void *trace_encoded_insn; +#if USE_ZJIT + int zjit_insn; + void *zjit_encoded_insn; +#endif } insn_data_t; -static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2]; +static insn_data_t insn_data[VM_BARE_INSTRUCTION_SIZE]; void +rb_free_encoded_insn_data(void) +{ + st_free_table(encoded_insn_data); +} + +// Initialize a table to decode bare, trace, and zjit instructions. +// This function also determines which instructions are used when TracePoint is enabled. +void rb_vm_encoded_insn_data_table_init(void) { #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE const void * const *table = rb_vm_get_insns_address_table(); #define INSN_CODE(insn) ((VALUE)table[insn]) #else -#define INSN_CODE(insn) (insn) +#define INSN_CODE(insn) ((VALUE)(insn)) #endif - st_data_t insn; - encoded_insn_data = st_init_numtable_with_size(VM_INSTRUCTION_SIZE / 2); - - for (insn = 0; insn < VM_INSTRUCTION_SIZE/2; insn++) { - st_data_t key1 = (st_data_t)INSN_CODE(insn); - st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_INSTRUCTION_SIZE/2); + encoded_insn_data = st_init_numtable_with_size(VM_BARE_INSTRUCTION_SIZE); - insn_data[insn].insn = (int)insn; + for (int insn = 0; insn < VM_BARE_INSTRUCTION_SIZE; insn++) { + insn_data[insn].insn = insn; insn_data[insn].insn_len = insn_len(insn); - if (insn != BIN(opt_invokebuiltin_delegate_leave)) { - insn_data[insn].notrace_encoded_insn = (void *) key1; - insn_data[insn].trace_encoded_insn = (void *) key2; - } - else { - insn_data[insn].notrace_encoded_insn = (void *) INSN_CODE(BIN(opt_invokebuiltin_delegate)); - insn_data[insn].trace_encoded_insn = (void *) INSN_CODE(BIN(opt_invokebuiltin_delegate) + VM_INSTRUCTION_SIZE/2); - } + // When tracing :return events, we convert opt_invokebuiltin_delegate_leave + leave into + // opt_invokebuiltin_delegate + trace_leave, presumably because we don't want to fire + // :return events before invokebuiltin. https://github.com/ruby/ruby/pull/3256 + int notrace_insn = (insn != BIN(opt_invokebuiltin_delegate_leave)) ? insn : BIN(opt_invokebuiltin_delegate); + insn_data[insn].notrace_encoded_insn = (void *)INSN_CODE(notrace_insn); + insn_data[insn].trace_encoded_insn = (void *)INSN_CODE(notrace_insn + VM_BARE_INSTRUCTION_SIZE); + st_data_t key1 = (st_data_t)INSN_CODE(insn); + st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_BARE_INSTRUCTION_SIZE); st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]); st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]); + +#if USE_ZJIT + int zjit_insn = vm_bare_insn_to_zjit_insn(insn); + insn_data[insn].zjit_insn = zjit_insn; + insn_data[insn].zjit_encoded_insn = (insn != zjit_insn) ? (void *)INSN_CODE(zjit_insn) : 0; + + if (insn != zjit_insn) { + st_data_t key3 = (st_data_t)INSN_CODE(zjit_insn); + st_add_direct(encoded_insn_data, key3, (st_data_t)&insn_data[insn]); + } +#endif } } +// Decode an insn address to an insn. This returns bare instructions +// even if they're trace/zjit instructions. Use rb_vm_insn_addr2opcode +// to decode trace/zjit instructions as is. int rb_vm_insn_addr2insn(const void *addr) { @@ -3325,7 +3957,8 @@ rb_vm_insn_addr2insn(const void *addr) rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr); } -// Unlike rb_vm_insn_addr2insn, this function can return trace opcode variants. +// Decode an insn address to an insn. Unlike rb_vm_insn_addr2insn, +// this function can return trace/zjit opcode variants. int rb_vm_insn_addr2opcode(const void *addr) { @@ -3336,15 +3969,22 @@ rb_vm_insn_addr2opcode(const void *addr) insn_data_t *e = (insn_data_t *)val; int opcode = e->insn; if (addr == e->trace_encoded_insn) { - opcode += VM_INSTRUCTION_SIZE/2; + opcode += VM_BARE_INSTRUCTION_SIZE; + } +#if USE_ZJIT + else if (addr == e->zjit_encoded_insn) { + opcode = e->zjit_insn; } +#endif return opcode; } rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr); } -// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. +// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. This returns +// bare instructions even if they're trace/zjit instructions. Use +// rb_vm_insn_addr2opcode to decode trace/zjit instructions as is. int rb_vm_insn_decode(const VALUE encoded) { @@ -3356,15 +3996,16 @@ rb_vm_insn_decode(const VALUE encoded) return insn; } +// Turn on or off tracing for a given instruction address static inline int -encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_current_trace) +encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_traced) { st_data_t key = (st_data_t)*iseq_encoded_insn; st_data_t val; if (st_lookup(encoded_insn_data, key, &val)) { insn_data_t *e = (insn_data_t *)val; - if (remain_current_trace && key == (st_data_t)e->trace_encoded_insn) { + if (remain_traced && key == (st_data_t)e->trace_encoded_insn) { turnon = 1; } *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn); @@ -3374,7 +4015,8 @@ encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn); } -void +// Turn off tracing for an instruction at pos after tracing event flags are cleared +static void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos) { const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); @@ -3400,14 +4042,16 @@ add_bmethod_events(rb_event_flag_t events) // Note, to support call/return events for bmethods, turnon_event can have more events than tpval. static int -iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line) +iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, rb_ractor_t *r) { unsigned int pc; int n = 0; const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; + rb_iseq_t *iseq_mut = (rb_iseq_t*)iseq; VM_ASSERT(ISEQ_EXECUTABLE_P(iseq)); + ASSERT_vm_locking_with_barrier(); for (pc=0; pc<body->iseq_size;) { const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc); @@ -3429,11 +4073,9 @@ iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, } if (n > 0) { - if (iseq->aux.exec.local_hooks == NULL) { - ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t); - iseq->aux.exec.local_hooks->is_local = true; - } - rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line); + rb_hook_list_t *hook_list = rb_iseq_local_hooks(iseq, r, true); + rb_hook_list_connect_local_tracepoint(hook_list, tpval, target_line); + iseq_mut->aux.exec.local_hooks_cnt++; } return n; @@ -3444,19 +4086,21 @@ struct trace_set_local_events_struct { VALUE tpval; unsigned int target_line; int n; + rb_ractor_t *r; }; static void iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p) { struct trace_set_local_events_struct *data = (struct trace_set_local_events_struct *)p; - data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line); + data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line, data->r); iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p); } int rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod) { + ASSERT_vm_locking_with_barrier(); struct trace_set_local_events_struct data; if (target_bmethod) { turnon_events = add_bmethod_events(turnon_events); @@ -3465,35 +4109,52 @@ rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t data.tpval = tpval; data.target_line = target_line; data.n = 0; + data.r = GET_RACTOR(); iseq_add_local_tracepoint_i(iseq, (void *)&data); - if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */ + if (0) fprintf(stderr, "Iseq disasm:\n:%s", RSTRING_PTR(rb_iseq_disasm(iseq))); /* for debug */ return data.n; } static int -iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval) +iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval, rb_ractor_t *r) { int n = 0; + unsigned int num_hooks_left; + unsigned int pc; + const struct rb_iseq_constant_body *body; + rb_iseq_t *iseq_mut = (rb_iseq_t*)iseq; + rb_hook_list_t *hook_list; + VALUE *iseq_encoded; + ASSERT_vm_locking_with_barrier(); - if (iseq->aux.exec.local_hooks) { - unsigned int pc; - const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); - VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; + hook_list = rb_iseq_local_hooks(iseq, r, false); + + if (hook_list) { rb_event_flag_t local_events = 0; - rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval); - local_events = iseq->aux.exec.local_hooks->events; + rb_event_flag_t prev_events = hook_list->events; + if (rb_hook_list_remove_local_tracepoint(hook_list, tpval)) { + RUBY_ASSERT(iseq->aux.exec.local_hooks_cnt > 0); + iseq_mut->aux.exec.local_hooks_cnt--; + local_events = hook_list->events; // remaining events for this ractor + num_hooks_left = rb_hook_list_count(hook_list); + if (local_events == 0 && prev_events != 0) { + st_delete(rb_ractor_targeted_hooks(r), (st_data_t*)&iseq, NULL); + rb_hook_list_free(hook_list); + } - if (local_events == 0) { - rb_hook_list_free(iseq->aux.exec.local_hooks); - ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL; - } + if (iseq->aux.exec.local_hooks_cnt == num_hooks_left) { + body = ISEQ_BODY(iseq); + iseq_encoded = (VALUE *)body->iseq_encoded; + local_events = add_bmethod_events(local_events); + for (pc = 0; pc<body->iseq_size;) { + rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc); + pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false); + } + } - local_events = add_bmethod_events(local_events); - for (pc = 0; pc<body->iseq_size;) { - rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc); - pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false); + n++; } } return n; @@ -3502,22 +4163,25 @@ iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval) struct trace_clear_local_events_struct { VALUE tpval; int n; + rb_ractor_t *r; }; static void iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p) { struct trace_clear_local_events_struct *data = (struct trace_clear_local_events_struct *)p; - data->n += iseq_remove_local_tracepoint(iseq, data->tpval); + data->n += iseq_remove_local_tracepoint(iseq, data->tpval, data->r); iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p); } int -rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval) +rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval, rb_ractor_t *r) { struct trace_clear_local_events_struct data; + ASSERT_vm_locking_with_barrier(); data.tpval = tpval; data.n = 0; + data.r = r; iseq_remove_local_tracepoint_i(iseq, (void *)&data); return data.n; @@ -3535,11 +4199,14 @@ rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events) return; } else { + // NOTE: this does not need VM barrier if it's a new ISEQ unsigned int pc; const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); + VALUE *iseq_encoded = (VALUE *)body->iseq_encoded; rb_event_flag_t enabled_events; - rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0; + rb_hook_list_t *local_hooks = rb_iseq_local_hooks(iseq, GET_RACTOR(), false); + rb_event_flag_t local_events = local_hooks ? local_hooks->events : 0; ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events; enabled_events = add_bmethod_events(turnon_events | local_events); @@ -3550,30 +4217,74 @@ rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events) } } -bool rb_vm_call_ivar_attrset_p(const vm_call_handler ch); void rb_vm_cc_general(const struct rb_callcache *cc); +static bool +clear_attr_cc(VALUE v) +{ + ASSERT_vm_locking_with_barrier(); + if (imemo_type_p(v, imemo_callcache) && vm_cc_ivar_p((const struct rb_callcache *)v)) { + rb_vm_cc_general((struct rb_callcache *)v); + return true; + } + else { + return false; + } +} + +static bool +clear_bf_cc(VALUE v) +{ + ASSERT_vm_locking_with_barrier(); + if (imemo_type_p(v, imemo_callcache) && vm_cc_bf_p((const struct rb_callcache *)v)) { + rb_vm_cc_general((struct rb_callcache *)v); + return true; + } + else { + return false; + } +} + static int clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data) { VALUE v = (VALUE)vstart; for (; v != (VALUE)vend; v += stride) { - void *ptr = asan_poisoned_object_p(v); - asan_unpoison_object(v, false); + void *ptr = rb_asan_poisoned_object_p(v); + rb_asan_unpoison_object(v, false); + clear_attr_cc(v); + asan_poison_object_if(ptr, v); + } + return 0; +} - if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) { - rb_vm_cc_general((struct rb_callcache *)v); - } +void +rb_clear_attr_ccs(void) +{ + RB_VM_LOCKING() { + rb_vm_barrier(); + rb_objspace_each_objects(clear_attr_ccs_i, NULL); + } +} +static int +clear_bf_ccs_i(void *vstart, void *vend, size_t stride, void *data) +{ + VALUE v = (VALUE)vstart; + for (; v != (VALUE)vend; v += stride) { + void *ptr = rb_asan_poisoned_object_p(v); + rb_asan_unpoison_object(v, false); + clear_bf_cc(v); asan_poison_object_if(ptr, v); } return 0; } void -rb_clear_attr_ccs(void) +rb_clear_bf_ccs(void) { - rb_objspace_each_objects(clear_attr_ccs_i, NULL); + ASSERT_vm_locking_with_barrier(); + rb_objspace_each_objects(clear_bf_ccs_i, NULL); } static int @@ -3583,14 +4294,15 @@ trace_set_i(void *vstart, void *vend, size_t stride, void *data) VALUE v = (VALUE)vstart; for (; v != (VALUE)vend; v += stride) { - void *ptr = asan_poisoned_object_p(v); - asan_unpoison_object(v, false); + void *ptr = rb_asan_poisoned_object_p(v); + rb_asan_unpoison_object(v, false); if (rb_obj_is_iseq(v)) { rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events); } - else if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) { - rb_vm_cc_general((struct rb_callcache *)v); + else if (clear_attr_cc(v)) { + } + else if (clear_bf_cc(v)) { } asan_poison_object_if(ptr, v); @@ -3601,7 +4313,10 @@ trace_set_i(void *vstart, void *vend, size_t stride, void *data) void rb_iseq_trace_set_all(rb_event_flag_t turnon_events) { - rb_objspace_each_objects(trace_set_i, &turnon_events); + RB_VM_LOCKING() { + rb_vm_barrier(); + rb_objspace_each_objects(trace_set_i, &turnon_events); + } } VALUE @@ -3878,6 +4593,9 @@ Init_ISeq(void) (void)iseq_s_load; rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1); + rb_define_singleton_method(rb_cISeq, "compile_parsey", iseqw_s_compile_parsey, -1); + rb_define_singleton_method(rb_cISeq, "compile_prism", iseqw_s_compile_prism, -1); + rb_define_singleton_method(rb_cISeq, "compile_file_prism", iseqw_s_compile_file_prism, -1); rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1); rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1); rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0); |
