diff options
Diffstat (limited to 'yjit.c')
-rw-r--r-- | yjit.c | 659 |
1 files changed, 524 insertions, 135 deletions
@@ -13,7 +13,9 @@ #include "internal/variable.h" #include "internal/compile.h" #include "internal/class.h" -#include "gc.h" +#include "internal/fixnum.h" +#include "internal/numeric.h" +#include "internal/gc.h" #include "vm_core.h" #include "vm_callinfo.h" #include "builtin.h" @@ -25,6 +27,8 @@ #include "probes.h" #include "probes_helper.h" #include "iseq.h" +#include "ruby/debug.h" +#include "internal/cont.h" // For mmapp(), sysconf() #ifndef _WIN32 @@ -34,6 +38,18 @@ #include <errno.h> +// Field offsets for the RObject struct +enum robject_offsets { + ROBJECT_OFFSET_AS_HEAP_IVPTR = offsetof(struct RObject, as.heap.ivptr), + ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL = offsetof(struct RObject, as.heap.iv_index_tbl), + ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary), +}; + +// Field offsets for the RString struct +enum rstring_offsets { + RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len) +}; + // We need size_t to have a known size to simplify code generation and FFI. // TODO(alan): check this in configure.ac to fail fast on 32 bit platforms. STATIC_ASSERT(64b_size_t, SIZE_MAX == UINT64_MAX); @@ -55,34 +71,159 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM); // types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on // the Rust side. // -// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals` -// which runs on upstream CI. The rationale for the check is unclear to Alan as -// we build with `-fvisibility=hidden` so only explicitly marked functions end -// up as public symbols in libruby.so. Perhaps the check is for the static -// libruby and or general namspacing hygiene? Alan admits his bias towards ELF -// platforms and newer compilers. -// +// What's up with the long prefix? Even though we build with `-fvisibility=hidden` +// we are sometimes a static library where the option doesn't prevent name collision. // The "_yjit_" part is for trying to be informative. We might want different // suffixes for symbols meant for Rust and symbols meant for broader CRuby. -void +bool rb_yjit_mark_writable(void *mem_block, uint32_t mem_size) { - if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) { - rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n", - mem_block, (unsigned long)mem_size, strerror(errno)); - } + return mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0; } void rb_yjit_mark_executable(void *mem_block, uint32_t mem_size) { + // Do not call mprotect when mem_size is zero. Some platforms may return + // an error for it. https://github.com/Shopify/ruby/issues/450 + if (mem_size == 0) { + return; + } if (mprotect(mem_block, mem_size, PROT_READ | PROT_EXEC)) { - rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s\n", + rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s", mem_block, (unsigned long)mem_size, strerror(errno)); } } +// Free the specified memory block. +bool +rb_yjit_mark_unused(void *mem_block, uint32_t mem_size) +{ + // On Linux, you need to use madvise MADV_DONTNEED to free memory. + // We might not need to call this on macOS, but it's not really documented. + // We generally prefer to do the same thing on both to ease testing too. + madvise(mem_block, mem_size, MADV_DONTNEED); + + // On macOS, mprotect PROT_NONE seems to reduce RSS. + // We also call this on Linux to avoid executing unused pages. + return mprotect(mem_block, mem_size, PROT_NONE) == 0; +} + +long +rb_yjit_array_len(VALUE a) +{ + return rb_array_len(a); +} + +// `start` is inclusive and `end` is exclusive. +void +rb_yjit_icache_invalidate(void *start, void *end) +{ + // Clear/invalidate the instruction cache. Compiles to nothing on x86_64 + // but required on ARM before running freshly written code. + // On Darwin it's the same as calling sys_icache_invalidate(). +#ifdef __GNUC__ + __builtin___clear_cache(start, end); +#elif defined(__aarch64__) +#error No instruction cache clear available with this compiler on Aarch64! +#endif +} + +# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x))) + +// For a given raw_sample (frame), set the hash with the caller's +// name, file, and line number. Return the hash with collected frame_info. +static void +rb_yjit_add_frame(VALUE hash, VALUE frame) +{ + VALUE frame_id = PTR2NUM(frame); + + if (RTEST(rb_hash_aref(hash, frame_id))) { + return; + } + else { + VALUE frame_info = rb_hash_new(); + // Full label for the frame + VALUE name = rb_profile_frame_full_label(frame); + // Absolute path of the frame from rb_iseq_realpath + VALUE file = rb_profile_frame_absolute_path(frame); + // Line number of the frame + VALUE line = rb_profile_frame_first_lineno(frame); + + // If absolute path isn't available use the rb_iseq_path + if (NIL_P(file)) { + file = rb_profile_frame_path(frame); + } + + rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name); + rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file); + rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new()); + rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new()); + + if (line != INT2FIX(0)) { + rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line); + } + + rb_hash_aset(hash, frame_id, frame_info); + } +} + +// Parses the YjitExitLocations raw_samples and line_samples collected by +// rb_yjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to +// be used by RubyVM::YJIT.exit_locations. yjit_raw_samples represents the raw frames information +// (without name, file, and line), and yjit_line_samples represents the line information +// of the iseq caller. +VALUE +rb_yjit_exit_locations_dict(VALUE *yjit_raw_samples, int *yjit_line_samples, int samples_len) +{ + VALUE result = rb_hash_new(); + VALUE raw_samples = rb_ary_new_capa(samples_len); + VALUE line_samples = rb_ary_new_capa(samples_len); + VALUE frames = rb_hash_new(); + int idx = 0; + + // While the index is less than samples_len, parse yjit_raw_samples and + // yjit_line_samples, then add casted values to raw_samples and line_samples array. + while (idx < samples_len) { + int num = (int)yjit_raw_samples[idx]; + int line_num = (int)yjit_line_samples[idx]; + idx++; + + // + 1 as we append an additional sample for the insn + rb_ary_push(raw_samples, SIZET2NUM(num + 1)); + rb_ary_push(line_samples, INT2NUM(line_num + 1)); + + // Loop through the length of samples_len and add data to the + // frames hash. Also push the current value onto the raw_samples + // and line_samples array respectively. + for (int o = 0; o < num; o++) { + rb_yjit_add_frame(frames, yjit_raw_samples[idx]); + rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx])); + idx++; + } + + rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx])); + idx++; + + rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx])); + idx++; + } + + // Set add the raw_samples, line_samples, and frames to the results + // hash. + rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples); + rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples); + rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames); + + return result; +} + uint32_t rb_yjit_get_page_size(void) { @@ -120,25 +261,29 @@ align_ptr(uint8_t *ptr, uint32_t multiple) } #endif -// Allocate a block of executable memory +// Address space reservation. Memory pages are mapped on an as needed basis. +// See the Rust mm module for details. uint8_t * -rb_yjit_alloc_exec_mem(uint32_t mem_size) +rb_yjit_reserve_addr_space(uint32_t mem_size) { #ifndef _WIN32 uint8_t *mem_block; // On Linux #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE) + uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE); + uint8_t *const cfunc_sample_addr = (void *)&rb_yjit_reserve_addr_space; + uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX; // Align the requested address to page size - uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE); - uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size); + uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size); + // Probe for addresses close to this function using MAP_FIXED_NOREPLACE + // to improve odds of being in range for 32-bit relative call instructions. do { - // Try to map a chunk of memory as executable - mem_block = (uint8_t*)mmap( - (void*)req_addr, + mem_block = mmap( + req_addr, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0 @@ -151,15 +296,15 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // +4MB req_addr += 4 * 1024 * 1024; - } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX); + } while (req_addr < probe_region_end); // On MacOS and other platforms #else // Try to map a chunk of memory as executable - mem_block = (uint8_t*)mmap( - (void*)rb_yjit_alloc_exec_mem, + mem_block = mmap( + (void *)rb_yjit_reserve_addr_space, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 @@ -169,10 +314,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // Fallback if (mem_block == MAP_FAILED) { // Try again without the address hint (e.g., valgrind) - mem_block = (uint8_t*)mmap( + mem_block = mmap( NULL, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 @@ -181,17 +326,14 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // Check that the memory mapping was successful if (mem_block == MAP_FAILED) { - perror("mmap call failed"); - exit(-1); + perror("ruby: yjit: mmap:"); + if(errno == ENOMEM) { + // No crash report if it's only insufficient memory + exit(EXIT_FAILURE); + } + rb_bug("mmap failed"); } - // Fill the executable memory with PUSH DS (0x1E) so that - // executing uninitialized memory will fault with #UD in - // 64-bit mode. - rb_yjit_mark_writable(mem_block, mem_size); - memset(mem_block, 0x1E, mem_size); - rb_yjit_mark_executable(mem_block, mem_size); - return mem_block; #else // Windows not supported for now @@ -201,7 +343,7 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // Is anyone listening for :c_call and :c_return event currently? bool -rb_c_method_tracing_currently_enabled(rb_execution_context_t *ec) +rb_c_method_tracing_currently_enabled(const rb_execution_context_t *ec) { rb_event_flag_t tracing_events; if (rb_multi_ractor_p()) { @@ -280,7 +422,12 @@ void rb_iseq_reset_jit_func(const rb_iseq_t *iseq) { RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq)); - iseq->body->jit_func = NULL; + iseq->body->jit_entry = NULL; + iseq->body->jit_exception = NULL; + // Enable re-compiling this ISEQ. Event when it's invalidated for TracePoint, + // we'd like to re-compile ISEQs that haven't been converted to trace_* insns. + iseq->body->jit_entry_calls = 0; + iseq->body->jit_exception_calls = 0; } // Get the PC for a given index in an iseq @@ -305,11 +452,24 @@ rb_iseq_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc) return rb_vm_insn_addr2opcode((const void *)at_pc); } -// used by jit_rb_str_bytesize in codegen.rs -VALUE -rb_str_bytesize(VALUE str) +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + +rb_proc_t * +rb_yjit_get_proc_ptr(VALUE procv) { - return LONG2NUM(RSTRING_LEN(str)); + rb_proc_t *proc; + GetProcPtr(procv, proc); + return proc; } // This is defined only as a named struct inside rb_iseq_constant_body. @@ -323,13 +483,6 @@ rb_insn_name(VALUE insn) return insn_name(insn); } -// Query the instruction length in bytes for YARV opcode insn -int -rb_insn_len(VALUE insn) -{ - return insn_len(insn); -} - unsigned int rb_vm_ci_argc(const struct rb_callinfo *ci) { @@ -367,61 +520,68 @@ rb_get_cikw_keywords_idx(const struct rb_callinfo_kwarg *cikw, int idx) } rb_method_visibility_t -rb_METHOD_ENTRY_VISI(rb_callable_method_entry_t *me) +rb_METHOD_ENTRY_VISI(const rb_callable_method_entry_t *me) { return METHOD_ENTRY_VISI(me); } rb_method_type_t -rb_get_cme_def_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_type(const rb_callable_method_entry_t *cme) { - return cme->def->type; + if (UNDEFINED_METHOD_ENTRY_P(cme)) { + return VM_METHOD_TYPE_UNDEF; + } + else { + return cme->def->type; + } } ID -rb_get_cme_def_body_attr_id(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_attr_id(const rb_callable_method_entry_t *cme) { return cme->def->body.attr.id; } +ID rb_get_symbol_id(VALUE namep); + enum method_optimized_type -rb_get_cme_def_body_optimized_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_type(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.type; } unsigned int -rb_get_cme_def_body_optimized_index(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_index(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.index; } rb_method_cfunc_t * -rb_get_cme_def_body_cfunc(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_cfunc(const rb_callable_method_entry_t *cme) { return UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); } uintptr_t -rb_get_def_method_serial(rb_method_definition_t *def) +rb_get_def_method_serial(const rb_method_definition_t *def) { return def->method_serial; } ID -rb_get_def_original_id(rb_method_definition_t *def) +rb_get_def_original_id(const rb_method_definition_t *def) { return def->original_id; } int -rb_get_mct_argc(rb_method_cfunc_t *mct) +rb_get_mct_argc(const rb_method_cfunc_t *mct) { return mct->argc; } void * -rb_get_mct_func(rb_method_cfunc_t *mct) +rb_get_mct_func(const rb_method_cfunc_t *mct) { return (void*)mct->func; // this field is defined as type VALUE (*func)(ANYARGS) } @@ -432,129 +592,182 @@ rb_get_def_iseq_ptr(rb_method_definition_t *def) return def_iseq_ptr(def); } -rb_iseq_t * -rb_get_iseq_body_local_iseq(rb_iseq_t *iseq) +VALUE +rb_get_def_bmethod_proc(rb_method_definition_t *def) +{ + RUBY_ASSERT(def->type == VM_METHOD_TYPE_BMETHOD); + return def->body.bmethod.proc; +} + +const rb_iseq_t * +rb_get_iseq_body_local_iseq(const rb_iseq_t *iseq) { return iseq->body->local_iseq; } +const rb_iseq_t * +rb_get_iseq_body_parent_iseq(const rb_iseq_t *iseq) +{ + return iseq->body->parent_iseq; +} + unsigned int -rb_get_iseq_body_local_table_size(rb_iseq_t *iseq) +rb_get_iseq_body_local_table_size(const rb_iseq_t *iseq) { return iseq->body->local_table_size; } VALUE * -rb_get_iseq_body_iseq_encoded(rb_iseq_t *iseq) +rb_get_iseq_body_iseq_encoded(const rb_iseq_t *iseq) { return iseq->body->iseq_encoded; } -bool -rb_get_iseq_body_builtin_inline_p(rb_iseq_t *iseq) +unsigned +rb_get_iseq_body_stack_max(const rb_iseq_t *iseq) { - return iseq->body->builtin_inline_p; + return iseq->body->stack_max; } -unsigned -rb_get_iseq_body_stack_max(rb_iseq_t *iseq) +enum rb_iseq_type +rb_get_iseq_body_type(const rb_iseq_t *iseq) { - return iseq->body->stack_max; + return iseq->body->type; +} + +bool +rb_get_iseq_flags_has_lead(const rb_iseq_t *iseq) +{ + return iseq->body->param.flags.has_lead; } bool -rb_get_iseq_flags_has_opt(rb_iseq_t *iseq) +rb_get_iseq_flags_has_opt(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_opt; } bool -rb_get_iseq_flags_has_kw(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kw(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kw; } bool -rb_get_iseq_flags_has_post(rb_iseq_t *iseq) +rb_get_iseq_flags_has_post(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_post; } bool -rb_get_iseq_flags_has_kwrest(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kwrest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kwrest; } bool -rb_get_iseq_flags_has_rest(rb_iseq_t *iseq) +rb_get_iseq_flags_anon_kwrest(const rb_iseq_t *iseq) +{ + return iseq->body->param.flags.anon_kwrest; +} + +bool +rb_get_iseq_flags_has_rest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_rest; } bool -rb_get_iseq_flags_has_block(rb_iseq_t *iseq) +rb_get_iseq_flags_ruby2_keywords(const rb_iseq_t *iseq) +{ + return iseq->body->param.flags.ruby2_keywords; +} + +bool +rb_get_iseq_flags_has_block(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_block; } bool -rb_get_iseq_flags_has_accepts_no_kwarg(rb_iseq_t *iseq) +rb_get_iseq_flags_ambiguous_param0(const rb_iseq_t *iseq) +{ + return iseq->body->param.flags.ambiguous_param0; +} + +bool +rb_get_iseq_flags_accepts_no_kwarg(const rb_iseq_t *iseq) { return iseq->body->param.flags.accepts_no_kwarg; } const rb_seq_param_keyword_struct * -rb_get_iseq_body_param_keyword(rb_iseq_t *iseq) +rb_get_iseq_body_param_keyword(const rb_iseq_t *iseq) { return iseq->body->param.keyword; } unsigned -rb_get_iseq_body_param_size(rb_iseq_t *iseq) +rb_get_iseq_body_param_size(const rb_iseq_t *iseq) { return iseq->body->param.size; } int -rb_get_iseq_body_param_lead_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_lead_num(const rb_iseq_t *iseq) { return iseq->body->param.lead_num; } int -rb_get_iseq_body_param_opt_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_num(const rb_iseq_t *iseq) { return iseq->body->param.opt_num; } const VALUE * -rb_get_iseq_body_param_opt_table(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_table(const rb_iseq_t *iseq) { return iseq->body->param.opt_table; } -// If true, the iseq is leaf and it can be replaced by a single C call. -bool -rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq) +VALUE +rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler) +{ + rb_proc_t *proc; + GetProcPtr(recv, proc); + return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler); +} + +unsigned int +rb_yjit_iseq_builtin_attrs(const rb_iseq_t *iseq) { - unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave)); - unsigned int leave_len = insn_len(BIN(leave)); + return iseq->body->builtin_attrs; +} - return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) && - rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) && - rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) && - iseq->body->builtin_inline_p - ); +// If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns. +static bool +invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq) +{ + int insn1 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]); + if ((int)iseq->body->iseq_size != insn_len(insn1) + insn_len(BIN(leave))) { + return false; + } + int insn2 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[insn_len(insn1)]); + return (insn1 == BIN(opt_invokebuiltin_delegate) || insn1 == BIN(opt_invokebuiltin_delegate_leave)) && + insn2 == BIN(leave); } -// Return an rb_builtin_function if the iseq contains only that leaf builtin function. +// Return an rb_builtin_function if the iseq contains only that builtin function. const struct rb_builtin_function * -rb_leaf_builtin_function(const rb_iseq_t *iseq) +rb_yjit_builtin_function(const rb_iseq_t *iseq) { - if (!rb_leaf_invokebuiltin_iseq_p(iseq)) + if (invokebuiltin_delegate_leave_p(iseq)) { + return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1]; + } + else { return NULL; - return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1]; + } } VALUE @@ -564,11 +777,17 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2) } struct rb_control_frame_struct * -rb_get_ec_cfp(rb_execution_context_t *ec) +rb_get_ec_cfp(const rb_execution_context_t *ec) { return ec->cfp; } +const rb_iseq_t * +rb_get_cfp_iseq(struct rb_control_frame_struct *cfp) +{ + return cfp->iseq; +} + VALUE * rb_get_cfp_pc(struct rb_control_frame_struct *cfp) { @@ -593,13 +812,6 @@ rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp) cfp->sp = sp; } -rb_iseq_t * -rb_cfp_get_iseq(struct rb_control_frame_struct *cfp) -{ - // TODO(alan) could assert frame type here to make sure that it's a ruby frame with an iseq. - return (rb_iseq_t*)cfp->iseq; -} - VALUE rb_get_cfp_self(struct rb_control_frame_struct *cfp) { @@ -612,6 +824,19 @@ rb_get_cfp_ep(struct rb_control_frame_struct *cfp) return (VALUE*)cfp->ep; } +const VALUE * +rb_get_cfp_ep_level(struct rb_control_frame_struct *cfp, uint32_t lv) +{ + uint32_t i; + const VALUE *ep = (VALUE*)cfp->ep; + for (i = 0; i < lv; i++) { + ep = VM_ENV_PREV_EP(ep); + } + return ep; +} + +extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp); + VALUE rb_yarv_class_of(VALUE obj) { @@ -626,6 +851,12 @@ rb_yarv_str_eql_internal(VALUE str1, VALUE str2) return rb_str_eql_internal(str1, str2); } +VALUE +rb_str_neq_internal(VALUE str1, VALUE str2) +{ + return rb_str_eql_internal(str1, str2) == Qtrue ? Qfalse : Qtrue; +} + // YJIT needs this function to never allocate and never raise VALUE rb_yarv_ary_entry_internal(VALUE ary, long offset) @@ -633,6 +864,83 @@ rb_yarv_ary_entry_internal(VALUE ary, long offset) return rb_ary_entry_internal(ary, offset); } +extern VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary); + +VALUE +rb_yjit_rb_ary_subseq_length(VALUE ary, long beg) +{ + long len = RARRAY_LEN(ary); + return rb_ary_subseq(ary, beg, len); +} + +VALUE +rb_yjit_fix_div_fix(VALUE recv, VALUE obj) +{ + return rb_fix_div_fix(recv, obj); +} + +VALUE +rb_yjit_fix_mod_fix(VALUE recv, VALUE obj) +{ + return rb_fix_mod_fix(recv, obj); +} + +// Return non-zero when `obj` is an array and its last item is a +// `ruby2_keywords` hash. We don't support this kind of splat. +size_t +rb_yjit_ruby2_keywords_splat_p(VALUE obj) +{ + if (!RB_TYPE_P(obj, T_ARRAY)) return 0; + long len = RARRAY_LEN(obj); + if (len == 0) return 0; + VALUE last = RARRAY_AREF(obj, len - 1); + if (!RB_TYPE_P(last, T_HASH)) return 0; + return FL_TEST_RAW(last, RHASH_PASS_AS_KEYWORDS); +} + +// Checks to establish preconditions for rb_yjit_splat_varg_cfunc() +VALUE +rb_yjit_splat_varg_checks(VALUE *sp, VALUE splat_array, rb_control_frame_t *cfp) +{ + // We inserted a T_ARRAY guard before this call + long len = RARRAY_LEN(splat_array); + + // Large splat arrays need a separate allocation + if (len < 0 || len > VM_ARGC_STACK_MAX) return Qfalse; + + // Would we overflow if we put the contents of the array onto the stack? + if (sp + len > (VALUE *)(cfp - 2)) return Qfalse; + + // Reject keywords hash since that requires duping it sometimes + if (len > 0) { + VALUE last_hash = RARRAY_AREF(splat_array, len - 1); + if (RB_TYPE_P(last_hash, T_HASH) && + FL_TEST_RAW(last_hash, RHASH_PASS_AS_KEYWORDS)) { + return Qfalse; + } + } + + return Qtrue; +} + +// Push array elements to the stack for a C method that has a variable number +// of parameters. Returns the number of arguments the splat array contributes. +int +rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array) +{ + VALUE splat_array = *stack_splat_array; + int len; + + // We already checked that length fits in `int` + RUBY_ASSERT(RB_TYPE_P(splat_array, T_ARRAY)); + len = (int)RARRAY_LEN(splat_array); + + // Push the contents of the array onto the stack + MEMCPY(stack_splat_array, RARRAY_CONST_PTR(splat_array), VALUE, len); + + return len; +} + // Print the Ruby source location of some ISEQ for debugging purposes void rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx) @@ -644,6 +952,30 @@ rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx) fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx)); } +// Get the number of digits required to print an integer +static int +num_digits(int integer) +{ + int num = 1; + while (integer /= 10) { + num++; + } + return num; +} + +// Allocate a C string that formats an ISEQ label like iseq_inspect() +char * +rb_yjit_iseq_inspect(const rb_iseq_t *iseq) +{ + const char *label = RSTRING_PTR(iseq->body->location.label); + const char *path = RSTRING_PTR(rb_iseq_path(iseq)); + int lineno = iseq->body->location.code_location.beg_pos.lineno; + + char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3); + sprintf(buf, "%s@%s:%d", label, path, lineno); + return buf; +} + // The FL_TEST() macro VALUE rb_FL_TEST(VALUE obj, VALUE flags) @@ -681,7 +1013,7 @@ rb_RSTRUCT_SET(VALUE st, int k, VALUE v) } const struct rb_callinfo * -rb_get_call_data_ci(struct rb_call_data *cd) +rb_get_call_data_ci(const struct rb_call_data *cd) { return cd->ci; } @@ -732,13 +1064,17 @@ rb_assert_cme_handle(VALUE handle) RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(handle, imemo_ment)); } -typedef void (*iseq_callback)(const rb_iseq_t *); +// Used for passing a callback and other data over rb_objspace_each_objects +struct iseq_callback_data { + rb_iseq_callback callback; + void *data; +}; // Heap-walking callback for rb_yjit_for_each_iseq(). static int for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data) { - const iseq_callback callback = (iseq_callback)data; + const struct iseq_callback_data *callback_data = (struct iseq_callback_data *)data; VALUE v = (VALUE)vstart; for (; v != (VALUE)vend; v += stride) { void *ptr = asan_poisoned_object_p(v); @@ -746,7 +1082,7 @@ for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data) if (rb_obj_is_iseq(v)) { rb_iseq_t *iseq = (rb_iseq_t *)v; - callback(iseq); + callback_data->callback(iseq, callback_data->data); } asan_poison_object_if(ptr, v); @@ -757,9 +1093,10 @@ for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data) // Iterate through the whole GC heap and invoke a callback for each iseq. // Used for global code invalidation. void -rb_yjit_for_each_iseq(iseq_callback callback) +rb_yjit_for_each_iseq(rb_iseq_callback callback, void *data) { - rb_objspace_each_objects(for_each_iseq_i, (void *)callback); + struct iseq_callback_data callback_data = { .callback = callback, .data = data }; + rb_objspace_each_objects(for_each_iseq_i, (void *)&callback_data); } // For running write barriers from Rust. Required when we add a new edge in the @@ -789,30 +1126,24 @@ rb_yjit_vm_unlock(unsigned int *recursive_lock_level, const char *file, int line rb_vm_lock_leave(recursive_lock_level, file, line); } -// Pointer to a YJIT entry point (machine code generated by YJIT) -typedef VALUE (*yjit_func_t)(rb_execution_context_t *, rb_control_frame_t *); - -bool -rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec) +void +rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception) { - bool success = true; RB_VM_LOCK_ENTER(); rb_vm_barrier(); - // Compile a block version starting at the first instruction - uint8_t *rb_yjit_iseq_gen_entry_point(const rb_iseq_t *iseq, rb_execution_context_t *ec); // defined in Rust - uint8_t *code_ptr = rb_yjit_iseq_gen_entry_point(iseq, ec); + // Compile a block version starting at the current instruction + uint8_t *rb_yjit_iseq_gen_entry_point(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception); // defined in Rust + uint8_t *code_ptr = rb_yjit_iseq_gen_entry_point(iseq, ec, jit_exception); - if (code_ptr) { - iseq->body->jit_func = (yjit_func_t)code_ptr; + if (jit_exception) { + iseq->body->jit_exception = (rb_jit_func_t)code_ptr; } else { - iseq->body->jit_func = 0; - success = false; + iseq->body->jit_entry = (rb_jit_func_t)code_ptr; } RB_VM_LOCK_LEAVE(); - return success; } // GC root for interacting with the GC @@ -858,28 +1189,86 @@ rb_yjit_invalidate_all_method_lookup_assumptions(void) // method caches, so we do nothing here for now. } +// Number of object shapes, which might be useful for investigating YJIT exit reasons. +static VALUE +object_shape_count(rb_execution_context_t *ec, VALUE self) +{ + // next_shape_id starts from 0, so it's the same as the count + return ULONG2NUM((unsigned long)GET_SHAPE_TREE()->next_shape_id); +} + +// Assert that we have the VM lock. Relevant mostly for multi ractor situations. +// The GC takes the lock before calling us, and this asserts that it indeed happens. +void +rb_yjit_assert_holding_vm_lock(void) +{ + ASSERT_vm_locking(); +} + +// The number of stack slots that vm_sendish() pops for send and invokesuper. +size_t +rb_yjit_sendish_sp_pops(const struct rb_callinfo *ci) +{ + return 1 - sp_inc_of_sendish(ci); // + 1 to ignore return value push +} + +// The number of stack slots that vm_sendish() pops for invokeblock. +size_t +rb_yjit_invokeblock_sp_pops(const struct rb_callinfo *ci) +{ + return 1 - sp_inc_of_invokeblock(ci); // + 1 to ignore return value push +} + +// Setup jit_return to avoid returning a non-Qundef value on a non-FINISH frame. +// See [jit_compile_exception] for details. +void +rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *leave_exception) +{ + if (VM_FRAME_FINISHED_P(cfp)) { + // If it's a FINISH frame, just normally exit with a non-Qundef value. + cfp->jit_return = leave_exit; + } + else if (cfp->jit_return) { + while (!VM_FRAME_FINISHED_P(cfp)) { + if (cfp->jit_return == leave_exit) { + // Unlike jit_exec(), leave_exit is not safe on a non-FINISH frame on + // jit_exec_exception(). See [jit_exec] and [jit_exec_exception] for + // details. Exit to the interpreter with Qundef to let it keep executing + // other Ruby frames. + cfp->jit_return = leave_exception; + return; + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + } + else { + // If the caller was not JIT code, exit to the interpreter with Qundef + // to keep executing Ruby frames with the interpreter. + cfp->jit_return = leave_exception; + } +} + // Primitives used by yjit.rb VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self); -VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE context); VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq); VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq); +VALUE rb_yjit_code_gc(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self); -VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_get_exit_locations(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_enable(rb_execution_context_t *ec, VALUE self, VALUE gen_stats, VALUE print_stats); // Preprocessed yjit.rb generated during build #include "yjit.rbinc" -// Can raise RuntimeError +// Initialize the GC hooks void -rb_yjit_init(void) +rb_yjit_init_gc_hooks(void) { - // Call the Rust initialization code - void rb_yjit_init_rust(void); - rb_yjit_init_rust(); - - // Initialize the GC hooks. Do this second as some code depend on Rust initialization. struct yjit_root_struct *root; VALUE yjit_root = TypedData_Make_Struct(0, struct yjit_root_struct, &yjit_root_type, root); - rb_gc_register_mark_object(yjit_root); + rb_vm_register_global_object(yjit_root); } |