diff options
Diffstat (limited to 'rjit_c.c')
-rw-r--r-- | rjit_c.c | 542 |
1 files changed, 542 insertions, 0 deletions
diff --git a/rjit_c.c b/rjit_c.c new file mode 100644 index 0000000000..e6d8d5da5c --- /dev/null +++ b/rjit_c.c @@ -0,0 +1,542 @@ +/********************************************************************** + + rjit_c.c - C helpers for RJIT + + Copyright (C) 2017 Takashi Kokubun <k0kubun@ruby-lang.org>. + +**********************************************************************/ + +#include "rjit.h" // defines USE_RJIT + +#if USE_RJIT + +#include "rjit_c.h" +#include "include/ruby/assert.h" +#include "include/ruby/debug.h" +#include "internal.h" +#include "internal/compile.h" +#include "internal/fixnum.h" +#include "internal/hash.h" +#include "internal/sanitizers.h" +#include "internal/gc.h" +#include "internal/proc.h" +#include "yjit.h" +#include "vm_insnhelper.h" +#include "probes.h" +#include "probes_helper.h" + +#include "insns.inc" +#include "insns_info.inc" + +// For mmapp(), sysconf() +#ifndef _WIN32 +#include <unistd.h> +#include <sys/mman.h> +#endif + +#include <errno.h> + +#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE) +// Align the current write position to a multiple of bytes +static uint8_t * +align_ptr(uint8_t *ptr, uint32_t multiple) +{ + // Compute the pointer modulo the given alignment boundary + uint32_t rem = ((uint32_t)(uintptr_t)ptr) % multiple; + + // If the pointer is already aligned, stop + if (rem == 0) + return ptr; + + // Pad the pointer by the necessary amount to align it + uint32_t pad = multiple - rem; + + return ptr + pad; +} +#endif + +// Address space reservation. Memory pages are mapped on an as needed basis. +// See the Rust mm module for details. +static uint8_t * +rjit_reserve_addr_space(uint32_t mem_size) +{ +#ifndef _WIN32 + uint8_t *mem_block; + + // On Linux + #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE) + uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE); + uint8_t *const cfunc_sample_addr = (void *)&rjit_reserve_addr_space; + uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX; + // Align the requested address to page size + uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size); + + // Probe for addresses close to this function using MAP_FIXED_NOREPLACE + // to improve odds of being in range for 32-bit relative call instructions. + do { + mem_block = mmap( + req_addr, + mem_size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, + -1, + 0 + ); + + // If we succeeded, stop + if (mem_block != MAP_FAILED) { + break; + } + + // +4MB + req_addr += 4 * 1024 * 1024; + } while (req_addr < probe_region_end); + + // On MacOS and other platforms + #else + // Try to map a chunk of memory as executable + mem_block = mmap( + (void *)rjit_reserve_addr_space, + mem_size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0 + ); + #endif + + // Fallback + if (mem_block == MAP_FAILED) { + // Try again without the address hint (e.g., valgrind) + mem_block = mmap( + NULL, + mem_size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0 + ); + } + + // Check that the memory mapping was successful + if (mem_block == MAP_FAILED) { + perror("ruby: yjit: mmap:"); + if(errno == ENOMEM) { + // No crash report if it's only insufficient memory + exit(EXIT_FAILURE); + } + rb_bug("mmap failed"); + } + + return mem_block; +#else + // Windows not supported for now + return NULL; +#endif +} + +static VALUE +mprotect_write(rb_execution_context_t *ec, VALUE self, VALUE rb_mem_block, VALUE rb_mem_size) +{ + void *mem_block = (void *)NUM2SIZET(rb_mem_block); + uint32_t mem_size = NUM2UINT(rb_mem_size); + return RBOOL(mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0); +} + +static VALUE +mprotect_exec(rb_execution_context_t *ec, VALUE self, VALUE rb_mem_block, VALUE rb_mem_size) +{ + void *mem_block = (void *)NUM2SIZET(rb_mem_block); + uint32_t mem_size = NUM2UINT(rb_mem_size); + if (mem_size == 0) return Qfalse; // Some platforms return an error for mem_size 0. + + if (mprotect(mem_block, mem_size, PROT_READ | PROT_EXEC)) { + rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s", + mem_block, (unsigned long)mem_size, strerror(errno)); + } + return Qtrue; +} + +static VALUE +rjit_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler) +{ + rb_proc_t *proc; + GetProcPtr(recv, proc); + return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler); +} + +static VALUE +rjit_str_neq_internal(VALUE str1, VALUE str2) +{ + return rb_str_eql_internal(str1, str2) == Qtrue ? Qfalse : Qtrue; +} + +static VALUE +rjit_str_simple_append(VALUE str1, VALUE str2) +{ + return rb_str_cat(str1, RSTRING_PTR(str2), RSTRING_LEN(str2)); +} + +static VALUE +rjit_rb_ary_subseq_length(VALUE ary, long beg) +{ + long len = RARRAY_LEN(ary); + return rb_ary_subseq(ary, beg, len); +} + +static VALUE +rjit_build_kwhash(const struct rb_callinfo *ci, VALUE *sp) +{ + const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci); + int kw_len = kw_arg->keyword_len; + VALUE hash = rb_hash_new_with_size(kw_len); + + for (int i = 0; i < kw_len; i++) { + VALUE key = kw_arg->keywords[i]; + VALUE val = *(sp - kw_len + i); + rb_hash_aset(hash, key, val); + } + return hash; +} + +// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event +// like the interpreter. When tracing for c_return is enabled, we patch the code after +// the C method return to call into this to fire the event. +static void +rjit_full_cfunc_return(rb_execution_context_t *ec, VALUE return_value) +{ + rb_control_frame_t *cfp = ec->cfp; + RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp); + const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp); + + RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp)); + RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC); + + // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this + + // Pop the C func's frame and fire the c_return TracePoint event + // Note that this is the same order as vm_call_cfunc_with_frame(). + rb_vm_pop_frame(ec); + EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value); + // Note, this deviates from the interpreter in that users need to enable + // a c_return TracePoint for this DTrace hook to work. A reasonable change + // since the Ruby return event works this way as well. + RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id); + + // Push return value into the caller's stack. We know that it's a frame that + // uses cfp->sp because we are patching a call done with gen_send_cfunc(). + ec->cfp->sp[0] = return_value; + ec->cfp->sp++; +} + +static rb_proc_t * +rjit_get_proc_ptr(VALUE procv) +{ + rb_proc_t *proc; + GetProcPtr(procv, proc); + return proc; +} + +// Use the same buffer size as Stackprof. +#define BUFF_LEN 2048 + +extern VALUE rb_rjit_raw_samples; +extern VALUE rb_rjit_line_samples; + +static void +rjit_record_exit_stack(const VALUE *exit_pc) +{ + // Let Primitive.rjit_stop_stats stop this + if (!rb_rjit_call_p) return; + + // Get the opcode from the encoded insn handler at this PC + int insn = rb_vm_insn_addr2opcode((void *)*exit_pc); + + // Create 2 array buffers to be used to collect frames and lines. + VALUE frames_buffer[BUFF_LEN] = { 0 }; + int lines_buffer[BUFF_LEN] = { 0 }; + + // Records call frame and line information for each method entry into two + // temporary buffers. Returns the number of times we added to the buffer (ie + // the length of the stack). + // + // Call frame info is stored in the frames_buffer, line number information + // in the lines_buffer. The first argument is the start point and the second + // argument is the buffer limit, set at 2048. + int stack_length = rb_profile_frames(0, BUFF_LEN, frames_buffer, lines_buffer); + int samples_length = stack_length + 3; // 3: length, insn, count + + // If yjit_raw_samples is less than or equal to the current length of the samples + // we might have seen this stack trace previously. + int prev_stack_len_index = (int)RARRAY_LEN(rb_rjit_raw_samples) - samples_length; + VALUE prev_stack_len_obj; + if (RARRAY_LEN(rb_rjit_raw_samples) >= samples_length && FIXNUM_P(prev_stack_len_obj = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index))) { + int prev_stack_len = NUM2INT(prev_stack_len_obj); + int idx = stack_length - 1; + int prev_frame_idx = 0; + bool seen_already = true; + + // If the previous stack length and current stack length are equal, + // loop and compare the current frame to the previous frame. If they are + // not equal, set seen_already to false and break out of the loop. + if (prev_stack_len == stack_length) { + while (idx >= 0) { + VALUE current_frame = frames_buffer[idx]; + VALUE prev_frame = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index + prev_frame_idx + 1); + + // If the current frame and previous frame are not equal, set + // seen_already to false and break out of the loop. + if (current_frame != prev_frame) { + seen_already = false; + break; + } + + idx--; + prev_frame_idx++; + } + + // If we know we've seen this stack before, increment the counter by 1. + if (seen_already) { + int prev_idx = (int)RARRAY_LEN(rb_rjit_raw_samples) - 1; + int prev_count = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, prev_idx)); + int new_count = prev_count + 1; + + rb_ary_store(rb_rjit_raw_samples, prev_idx, INT2NUM(new_count)); + rb_ary_store(rb_rjit_line_samples, prev_idx, INT2NUM(new_count)); + return; + } + } + } + + rb_ary_push(rb_rjit_raw_samples, INT2NUM(stack_length)); + rb_ary_push(rb_rjit_line_samples, INT2NUM(stack_length)); + + int idx = stack_length - 1; + + while (idx >= 0) { + VALUE frame = frames_buffer[idx]; + int line = lines_buffer[idx]; + + rb_ary_push(rb_rjit_raw_samples, frame); + rb_ary_push(rb_rjit_line_samples, INT2NUM(line)); + + idx--; + } + + // Push the insn value into the yjit_raw_samples Vec. + rb_ary_push(rb_rjit_raw_samples, INT2NUM(insn)); + + // Push the current line onto the yjit_line_samples Vec. This + // points to the line in insns.def. + int line = (int)RARRAY_LEN(rb_rjit_line_samples) - 1; + rb_ary_push(rb_rjit_line_samples, INT2NUM(line)); + + // Push number of times seen onto the stack, which is 1 + // because it's the first time we've seen it. + rb_ary_push(rb_rjit_raw_samples, INT2NUM(1)); + rb_ary_push(rb_rjit_line_samples, INT2NUM(1)); +} + +// For a given raw_sample (frame), set the hash with the caller's +// name, file, and line number. Return the hash with collected frame_info. +static void +rjit_add_frame(VALUE hash, VALUE frame) +{ + VALUE frame_id = SIZET2NUM(frame); + + if (RTEST(rb_hash_aref(hash, frame_id))) { + return; + } + else { + VALUE frame_info = rb_hash_new(); + // Full label for the frame + VALUE name = rb_profile_frame_full_label(frame); + // Absolute path of the frame from rb_iseq_realpath + VALUE file = rb_profile_frame_absolute_path(frame); + // Line number of the frame + VALUE line = rb_profile_frame_first_lineno(frame); + + // If absolute path isn't available use the rb_iseq_path + if (NIL_P(file)) { + file = rb_profile_frame_path(frame); + } + + rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name); + rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file); + rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new()); + rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new()); + + if (line != INT2FIX(0)) { + rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line); + } + + rb_hash_aset(hash, frame_id, frame_info); + } +} + +static VALUE +rjit_exit_traces(void) +{ + int samples_len = (int)RARRAY_LEN(rb_rjit_raw_samples); + RUBY_ASSERT(samples_len == RARRAY_LEN(rb_rjit_line_samples)); + + VALUE result = rb_hash_new(); + VALUE raw_samples = rb_ary_new_capa(samples_len); + VALUE line_samples = rb_ary_new_capa(samples_len); + VALUE frames = rb_hash_new(); + int idx = 0; + + // While the index is less than samples_len, parse yjit_raw_samples and + // yjit_line_samples, then add casted values to raw_samples and line_samples array. + while (idx < samples_len) { + int num = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, idx)); + int line_num = NUM2INT(RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + + rb_ary_push(raw_samples, SIZET2NUM(num)); + rb_ary_push(line_samples, INT2NUM(line_num)); + + // Loop through the length of samples_len and add data to the + // frames hash. Also push the current value onto the raw_samples + // and line_samples array respectively. + for (int o = 0; o < num; o++) { + rjit_add_frame(frames, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(raw_samples, SIZET2NUM(RARRAY_AREF(rb_rjit_raw_samples, idx))); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + } + + // insn BIN and lineno + rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + + // Number of times seen + rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + } + + // Set add the raw_samples, line_samples, and frames to the results + // hash. + rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples); + rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples); + rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames); + + return result; +} + +// An offsetof implementation that works for unnamed struct and union. +// Multiplying 8 for compatibility with libclang's offsetof. +#define OFFSETOF(ptr, member) RB_SIZE2NUM(((char *)&ptr.member - (char*)&ptr) * 8) + +#define SIZEOF(type) RB_SIZE2NUM(sizeof(type)) +#define SIGNED_TYPE_P(type) RBOOL((type)(-1) < (type)(1)) + +// Insn side exit counters +static size_t rjit_insn_exits[VM_INSTRUCTION_SIZE] = { 0 }; + +// macOS: brew install capstone +// Ubuntu/Debian: apt-get install libcapstone-dev +// Fedora: dnf -y install capstone-devel +#ifdef HAVE_LIBCAPSTONE +#include <capstone/capstone.h> +#endif + +// Return an array of [address, mnemonic, op_str] +static VALUE +dump_disasm(rb_execution_context_t *ec, VALUE self, VALUE from, VALUE to, VALUE test) +{ + VALUE result = rb_ary_new(); +#ifdef HAVE_LIBCAPSTONE + // Prepare for calling cs_disasm + static csh handle; + if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK) { + rb_raise(rb_eRuntimeError, "failed to make Capstone handle"); + } + size_t from_addr = NUM2SIZET(from); + size_t to_addr = NUM2SIZET(to); + + // Call cs_disasm and convert results to a Ruby array + cs_insn *insns; + size_t base_addr = RTEST(test) ? 0 : from_addr; // On tests, start from 0 for output stability. + size_t count = cs_disasm(handle, (const uint8_t *)from_addr, to_addr - from_addr, base_addr, 0, &insns); + for (size_t i = 0; i < count; i++) { + VALUE vals = rb_ary_new_from_args(3, LONG2NUM(insns[i].address), rb_str_new2(insns[i].mnemonic), rb_str_new2(insns[i].op_str)); + rb_ary_push(result, vals); + } + + // Free memory used by capstone + cs_free(insns, count); + cs_close(&handle); +#endif + return result; +} + +// Same as `RubyVM::RJIT.enabled?`, but this is used before it's defined. +static VALUE +rjit_enabled_p(rb_execution_context_t *ec, VALUE self) +{ + return RBOOL(rb_rjit_enabled); +} + +static int +for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data) +{ + VALUE block = (VALUE)data; + VALUE v = (VALUE)vstart; + for (; v != (VALUE)vend; v += stride) { + void *ptr = asan_poisoned_object_p(v); + asan_unpoison_object(v, false); + + if (rb_obj_is_iseq(v)) { + extern VALUE rb_rjit_iseq_new(rb_iseq_t *iseq); + rb_iseq_t *iseq = (rb_iseq_t *)v; + rb_funcall(block, rb_intern("call"), 1, rb_rjit_iseq_new(iseq)); + } + + asan_poison_object_if(ptr, v); + } + return 0; +} + +static VALUE +rjit_for_each_iseq(rb_execution_context_t *ec, VALUE self, VALUE block) +{ + rb_objspace_each_objects(for_each_iseq_i, (void *)block); + return Qnil; +} + +// bindgen references +extern ID rb_get_symbol_id(VALUE name); +extern VALUE rb_fix_aref(VALUE fix, VALUE idx); +extern VALUE rb_str_getbyte(VALUE str, VALUE index); +extern VALUE rb_vm_concat_array(VALUE ary1, VALUE ary2st); +extern VALUE rb_vm_get_ev_const(rb_execution_context_t *ec, VALUE orig_klass, ID id, VALUE allow_nil); +extern VALUE rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic); +extern VALUE rb_vm_opt_newarray_min(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr); +extern VALUE rb_vm_opt_newarray_max(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr); +extern VALUE rb_vm_opt_newarray_hash(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr); +extern VALUE rb_vm_splat_array(VALUE flag, VALUE array); +extern bool rb_simple_iseq_p(const rb_iseq_t *iseq); +extern bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v); +extern bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep); +extern rb_event_flag_t rb_rjit_global_events; +extern void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic); +extern VALUE rb_vm_throw(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t throw_state, VALUE throwobj); +extern VALUE rb_reg_new_ary(VALUE ary, int opt); +extern void rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic); +extern VALUE rb_str_bytesize(VALUE str); +extern const rb_callable_method_entry_t *rb_callable_method_entry_or_negative(VALUE klass, ID mid); +extern VALUE rb_vm_yield_with_cfunc(rb_execution_context_t *ec, const struct rb_captured_block *captured, int argc, const VALUE *argv); +extern VALUE rb_vm_set_ivar_id(VALUE obj, ID id, VALUE val); +extern VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary); +extern void* rb_rjit_entry_stub_hit(VALUE branch_stub); +extern void* rb_rjit_branch_stub_hit(VALUE branch_stub, int sp_offset, int target0_p); +extern uint64_t rb_vm_insns_count; + +#include "rjit_c.rbinc" + +#endif // USE_RJIT |