diff options
Diffstat (limited to 'yjit/src/cruby.rs')
-rw-r--r-- | yjit/src/cruby.rs | 170 |
1 files changed, 137 insertions, 33 deletions
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index f4a6956926..0709e2a079 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -83,7 +83,7 @@ #![allow(non_upper_case_globals)] use std::convert::From; -use std::ffi::CString; +use std::ffi::{CString, CStr}; use std::os::raw::{c_char, c_int, c_uint}; use std::panic::{catch_unwind, UnwindSafe}; @@ -96,7 +96,7 @@ pub type size_t = u64; pub type RedefinitionFlag = u32; #[allow(dead_code)] -#[allow(clippy::useless_transmute)] +#[allow(clippy::all)] mod autogened { use super::*; // Textually include output from rust-bindgen as suggested by its user guide. @@ -107,13 +107,26 @@ pub use autogened::*; // TODO: For #defines that affect memory layout, we need to check for them // on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. -// These are functions we expose from vm_insnhelper.c, not in any header. +// These are functions we expose from C files, not in any header. // Parsing it would result in a lot of duplicate definitions. // Use bindgen for functions that are defined in headers or in yjit.c. #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { + pub fn rb_check_overloaded_cme( + me: *const rb_callable_method_entry_t, + ci: *const rb_callinfo, + ) -> *const rb_callable_method_entry_t; + + // Floats within range will be encoded without creating objects in the heap. + // (Range is 0x3000000000000001 to 0x4fffffffffffffff (1.7272337110188893E-77 to 2.3158417847463237E+77). + pub fn rb_float_new(d: f64) -> VALUE; + + pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; + pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); + pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; + pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, reg_cfp: CfpPtr, @@ -135,19 +148,20 @@ extern "C" { ic: ICVARC, ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; - pub fn rb_str_bytesize(str: VALUE) -> VALUE; + pub fn rb_vm_stack_canary() -> VALUE; + pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); } // Renames pub use rb_insn_name as raw_insn_name; -pub use rb_insn_len as raw_insn_len; -pub use rb_yarv_class_of as CLASS_OF; pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_iseq as get_cfp_iseq; pub use rb_get_cfp_pc as get_cfp_pc; pub use rb_get_cfp_sp as get_cfp_sp; pub use rb_get_cfp_self as get_cfp_self; pub use rb_get_cfp_ep as get_cfp_ep; pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_vm_base_ptr as get_cfp_bp; pub use rb_get_cme_def_type as get_cme_def_type; pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; @@ -162,11 +176,11 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size; pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_body_type as get_iseq_body_type; pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; -pub use rb_get_iseq_flags_ruby2_keywords as get_iseq_flags_ruby2_keywords; pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; @@ -183,7 +197,8 @@ pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; pub use rb_get_call_data_ci as get_call_data_ci; pub use rb_yarv_str_eql_internal as rb_str_eql_internal; pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; -pub use rb_yarv_fix_mod_fix as rb_fix_mod_fix; +pub use rb_yjit_fix_div_fix as rb_fix_div_fix; +pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix; pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; @@ -199,8 +214,6 @@ pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { - use std::ffi::CStr; - unsafe { // Look up Ruby's NULL-terminated insn name string let op_name = raw_insn_name(VALUE(opcode)); @@ -220,7 +233,7 @@ pub fn insn_len(opcode: usize) -> u32 { #[cfg(not(test))] unsafe { - raw_insn_len(VALUE(opcode)).try_into().unwrap() + rb_insn_len(VALUE(opcode)).try_into().unwrap() } } @@ -243,6 +256,30 @@ pub struct VALUE(pub usize); /// Pointer to an ISEQ pub type IseqPtr = *const rb_iseq_t; +// Given an ISEQ pointer, convert PC to insn_idx +pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { + let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; + unsafe { pc.offset_from(pc_zero) }.try_into().ok() +} + +/// Given an ISEQ pointer and an instruction index, return an opcode. +pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 { + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 } +} + +/// Return a poison value to be set above the stack top to verify leafness. +#[cfg(not(test))] +pub fn vm_stack_canary() -> u64 { + unsafe { rb_vm_stack_canary() }.as_u64() +} + +/// Avoid linking the C function in `cargo test` +#[cfg(test)] +pub fn vm_stack_canary() -> u64 { + 0 +} + /// Opaque execution-context type from vm_core.h #[repr(C)] pub struct rb_execution_context_struct { @@ -277,13 +314,6 @@ pub struct rb_callcache { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -/// Opaque call-info type from vm_callinfo.h -#[repr(C)] -pub struct rb_callinfo_kwarg { - _data: [u8; 0], - _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, -} - /// Opaque control_frame (CFP) struct from vm_core.h #[repr(C)] pub struct rb_control_frame_struct { @@ -367,6 +397,11 @@ impl VALUE { } } + /// Returns true if the value is T_HASH + pub fn hash_p(self) -> bool { + !self.special_const_p() && self.builtin_type() == RUBY_T_HASH + } + /// Returns true or false depending on whether the value is nil pub fn nil_p(self) -> bool { self == Qnil @@ -391,7 +426,13 @@ impl VALUE { } pub fn class_of(self) -> VALUE { - unsafe { CLASS_OF(self) } + if !self.special_const_p() { + let builtin_type = self.builtin_type(); + assert_ne!(builtin_type, RUBY_T_NONE, "YJIT should only see live objects"); + assert_ne!(builtin_type, RUBY_T_MOVED, "YJIT should only see live objects"); + } + + unsafe { rb_yarv_class_of(self) } } pub fn is_frozen(self) -> bool { @@ -463,7 +504,7 @@ impl VALUE { us as *mut T } - /// For working with opague pointers and encoding null check. + /// For working with opaque pointers and encoding null check. /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>` /// is for `*mut T` while our C functions are setup to use `*const T`. /// Casting from `NonNull<T>` to `*const T` is too noisy. @@ -564,24 +605,38 @@ pub fn rust_str_to_ruby(str: &str) -> VALUE { pub fn rust_str_to_sym(str: &str) -> VALUE { let c_str = CString::new(str).unwrap(); let c_ptr: *const c_char = c_str.as_ptr(); - unsafe { rb_id2sym(rb_intern(c_ptr)) } } +/// Produce an owned Rust String from a C char pointer +pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> { + assert!(c_char_ptr != std::ptr::null()); + + let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) }; + + match c_str.to_str() { + Ok(rust_str) => Some(rust_str.to_string()), + Err(_) => None + } +} + /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { - pub file: CString, + pub file: &'static CStr, pub line: c_int, } /// Make a [SourceLocation] at the current spot. macro_rules! src_loc { () => { - // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds. - $crate::cruby::SourceLocation { - file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths - line: line!().try_into().unwrap(), // not that many lines + { + // Nul-terminated string with static lifetime, make a CStr out of it safely. + let file: &'static str = concat!(file!(), '\0'); + $crate::cruby::SourceLocation { + file: unsafe { std::ffi::CStr::from_ptr(file.as_ptr().cast()) }, + line: line!().try_into().unwrap(), + } } }; } @@ -619,17 +674,16 @@ where Err(_) => { // Theoretically we can recover from some of these panics, // but it's too late if the unwind reaches here. - use std::{process, str}; let _ = catch_unwind(|| { // IO functions can panic too. eprintln!( "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...", - str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"), + loc.file.to_string_lossy(), line, ); }); - process::abort(); + std::process::abort(); } }; @@ -663,8 +717,10 @@ mod manual_defs { pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2; // From vm_callinfo.h - uses calculation that seems to confuse bindgen + pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit; pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit; pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit; + pub const VM_CALL_FORWARDING: u32 = 1 << VM_CALL_FORWARDING_bit; pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit; pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit; pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit; @@ -673,7 +729,7 @@ mod manual_defs { pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit; // From internal/struct.h - in anonymous enum, so we can't easily import it - pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize; + pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 |RUBY_FL_USER2 | RUBY_FL_USER1) as usize; // From iseq.h - via a different constant, which seems to confuse bindgen pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER7 as usize; @@ -689,6 +745,9 @@ mod manual_defs { pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr" pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary" + pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary" + // Constants from rb_control_frame_t vm_core.h pub const RUBY_OFFSET_CFP_PC: i32 = 0; pub const RUBY_OFFSET_CFP_SP: i32 = 8; @@ -696,9 +755,8 @@ mod manual_defs { pub const RUBY_OFFSET_CFP_SELF: i32 = 24; pub const RUBY_OFFSET_CFP_EP: i32 = 32; pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40; - pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__ - pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56; - pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64; + pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48; + pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56; // Constants from rb_execution_context_t vm_core.h pub const RUBY_OFFSET_EC_CFP: i32 = 16; @@ -714,3 +772,49 @@ mod manual_defs { pub const RUBY_OFFSET_ICE_VALUE: i32 = 8; } pub use manual_defs::*; + +/// Interned ID values for Ruby symbols and method names. +/// See [type@crate::cruby::ID] and usages outside of YJIT. +pub(crate) mod ids { + use std::sync::atomic::AtomicU64; + /// Globals to cache IDs on boot. Atomic to use with relaxed ordering + /// so reads can happen without `unsafe`. Synchronization done through + /// the VM lock. + macro_rules! def_ids { + ($(name: $ident:ident content: $str:literal)*) => { + $( + #[doc = concat!("[type@crate::cruby::ID] for `", stringify!($str), "`")] + pub static $ident: AtomicU64 = AtomicU64::new(0); + )* + + pub(crate) fn init() { + $( + let content = &$str; + let ptr: *const u8 = content.as_ptr(); + + // Lookup and cache each ID + $ident.store( + unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) }, + std::sync::atomic::Ordering::Relaxed + ); + )* + + } + } + } + + def_ids! { + name: NULL content: b"" + name: respond_to_missing content: b"respond_to_missing?" + name: to_ary content: b"to_ary" + name: eq content: b"==" + } +} + +/// Get an CRuby `ID` to an interned string, e.g. a particular method name. +macro_rules! ID { + ($id_name:ident) => { + $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed) + } +} +pub(crate) use ID; |