diff options
Diffstat (limited to 'yjit/src/cruby.rs')
-rw-r--r-- | yjit/src/cruby.rs | 509 |
1 files changed, 279 insertions, 230 deletions
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 51ba9c1531..68c0304b06 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -83,8 +83,8 @@ #![allow(non_upper_case_globals)] use std::convert::From; -use std::ffi::CString; -use std::os::raw::{c_char, c_int, c_long, c_uint}; +use std::ffi::{CString, CStr}; +use std::os::raw::{c_char, c_int, c_uint}; use std::panic::{catch_unwind, UnwindSafe}; // We check that we can do this with the configure script and a couple of @@ -96,6 +96,7 @@ pub type size_t = u64; pub type RedefinitionFlag = u32; #[allow(dead_code)] +#[allow(clippy::all)] mod autogened { use super::*; // Textually include output from rust-bindgen as suggested by its user guide. @@ -106,157 +107,20 @@ pub use autogened::*; // TODO: For #defines that affect memory layout, we need to check for them // on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. -// TODO: -// Temporary, these external bindings will likely be auto-generated -// and textually included in this file +// These are functions we expose from C files, not in any header. +// Parsing it would result in a lot of duplicate definitions. +// Use bindgen for functions that are defined in headers or in yjit.c. #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { - #[link_name = "rb_insn_name"] - pub fn raw_insn_name(insn: VALUE) -> *const c_char; - - #[link_name = "rb_insn_len"] - pub fn raw_insn_len(v: VALUE) -> c_int; - - #[link_name = "rb_yarv_class_of"] - pub fn CLASS_OF(v: VALUE) -> VALUE; - - #[link_name = "rb_get_ec_cfp"] - pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr; - - #[link_name = "rb_get_cfp_pc"] - pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_sp"] - pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_self"] - pub fn get_cfp_self(cfp: CfpPtr) -> VALUE; - - #[link_name = "rb_get_cfp_ep"] - pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cme_def_type"] - pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; - - #[link_name = "rb_get_cme_def_body_attr_id"] - pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; - - #[link_name = "rb_get_cme_def_body_optimized_type"] - pub fn get_cme_def_body_optimized_type( - cme: *const rb_callable_method_entry_t, - ) -> method_optimized_type; - - #[link_name = "rb_get_cme_def_body_optimized_index"] - pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint; - - #[link_name = "rb_get_cme_def_body_cfunc"] - pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t) - -> *mut rb_method_cfunc_t; - - #[link_name = "rb_get_def_method_serial"] - /// While this returns a uintptr_t in C, we always use it as a Rust u64 - pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64; - - #[link_name = "rb_get_def_original_id"] - pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID; - - #[link_name = "rb_get_mct_argc"] - pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int; - - #[link_name = "rb_get_mct_func"] - pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8; - - #[link_name = "rb_get_def_iseq_ptr"] - pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr; - - #[link_name = "rb_iseq_encoded_size"] - pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_local_iseq"] - pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr; - - #[link_name = "rb_get_iseq_body_iseq_encoded"] - pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE; - - #[link_name = "rb_get_iseq_body_stack_max"] - pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_flags_has_opt"] - pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kw"] - pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_rest"] - pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_post"] - pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kwrest"] - pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_block"] - pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"] - pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_body_local_table_size"] - pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_keyword"] - pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct; - - #[link_name = "rb_get_iseq_body_param_size"] - pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_lead_num"] - pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_num"] - pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_table"] - pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE; - - #[link_name = "rb_get_cikw_keyword_len"] - pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int; - - #[link_name = "rb_get_cikw_keywords_idx"] - pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE; - - #[link_name = "rb_get_call_data_ci"] - pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; - - #[link_name = "rb_yarv_str_eql_internal"] - pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; - - #[link_name = "rb_yarv_ary_entry_internal"] - pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE; - - #[link_name = "rb_FL_TEST"] - pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_FL_TEST_RAW"] - pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_RB_TYPE_P"] - pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; - - #[link_name = "rb_BASIC_OP_UNREDEFINED_P"] - pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool; - - #[link_name = "rb_RSTRUCT_LEN"] - pub fn RSTRUCT_LEN(st: VALUE) -> c_long; - - #[link_name = "rb_RSTRUCT_SET"] - pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE); - - // Ruby only defines these in vm_insnhelper.c, not in any header. - // Parsing it would result in a lot of duplicate definitions. - pub fn rb_vm_opt_mod(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_check_overloaded_cme( + me: *const rb_callable_method_entry_t, + ci: *const rb_callinfo, + ) -> *const rb_callable_method_entry_t; + pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; + pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; + pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; + pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, reg_cfp: CfpPtr, @@ -264,7 +128,7 @@ extern "C" { obj: VALUE, v: VALUE, ) -> bool; - pub fn rb_vm_set_ivar_idx(obj: VALUE, idx: u32, val: VALUE) -> VALUE; + pub fn rb_vm_set_ivar_id(obj: VALUE, idx: u32, val: VALUE) -> VALUE; pub fn rb_vm_setinstancevariable(iseq: IseqPtr, obj: VALUE, id: ID, val: VALUE, ic: IVC); pub fn rb_aliased_callable_method_entry( me: *const rb_callable_method_entry_t, @@ -278,32 +142,72 @@ extern "C" { ic: ICVARC, ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; - - #[link_name = "rb_vm_ci_argc"] - pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int; - - #[link_name = "rb_vm_ci_mid"] - pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID; - - #[link_name = "rb_vm_ci_flag"] - pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint; - - #[link_name = "rb_vm_ci_kwarg"] - pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; - - #[link_name = "rb_METHOD_ENTRY_VISI"] - pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; - - pub fn rb_str_bytesize(str: VALUE) -> VALUE; - - #[link_name = "rb_RCLASS_ORIGIN"] - pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE; + pub fn rb_vm_stack_canary() -> VALUE; + pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); } +// Renames +pub use rb_insn_name as raw_insn_name; +pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_iseq as get_cfp_iseq; +pub use rb_get_cfp_pc as get_cfp_pc; +pub use rb_get_cfp_sp as get_cfp_sp; +pub use rb_get_cfp_self as get_cfp_self; +pub use rb_get_cfp_ep as get_cfp_ep; +pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_vm_base_ptr as get_cfp_bp; +pub use rb_get_cme_def_type as get_cme_def_type; +pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; +pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; +pub use rb_get_cme_def_body_optimized_index as get_cme_def_body_optimized_index; +pub use rb_get_cme_def_body_cfunc as get_cme_def_body_cfunc; +pub use rb_get_def_method_serial as get_def_method_serial; +pub use rb_get_def_original_id as get_def_original_id; +pub use rb_get_mct_argc as get_mct_argc; +pub use rb_get_mct_func as get_mct_func; +pub use rb_get_def_iseq_ptr as get_def_iseq_ptr; +pub use rb_iseq_encoded_size as get_iseq_encoded_size; +pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; +pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; +pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_body_type as get_iseq_body_type; +pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; +pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; +pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; +pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; +pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; +pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; +pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; +pub use rb_get_iseq_flags_ambiguous_param0 as get_iseq_flags_ambiguous_param0; +pub use rb_get_iseq_flags_accepts_no_kwarg as get_iseq_flags_accepts_no_kwarg; +pub use rb_get_iseq_body_local_table_size as get_iseq_body_local_table_size; +pub use rb_get_iseq_body_param_keyword as get_iseq_body_param_keyword; +pub use rb_get_iseq_body_param_size as get_iseq_body_param_size; +pub use rb_get_iseq_body_param_lead_num as get_iseq_body_param_lead_num; +pub use rb_get_iseq_body_param_opt_num as get_iseq_body_param_opt_num; +pub use rb_get_iseq_body_param_opt_table as get_iseq_body_param_opt_table; +pub use rb_get_cikw_keyword_len as get_cikw_keyword_len; +pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; +pub use rb_get_call_data_ci as get_call_data_ci; +pub use rb_yarv_str_eql_internal as rb_str_eql_internal; +pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; +pub use rb_yjit_fix_div_fix as rb_fix_div_fix; +pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix; +pub use rb_FL_TEST as FL_TEST; +pub use rb_FL_TEST_RAW as FL_TEST_RAW; +pub use rb_RB_TYPE_P as RB_TYPE_P; +pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; +pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; +pub use rb_RSTRUCT_SET as RSTRUCT_SET; +pub use rb_vm_ci_argc as vm_ci_argc; +pub use rb_vm_ci_mid as vm_ci_mid; +pub use rb_vm_ci_flag as vm_ci_flag; +pub use rb_vm_ci_kwarg as vm_ci_kwarg; +pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI; +pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; + /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { - use std::ffi::CStr; - unsafe { // Look up Ruby's NULL-terminated insn name string let op_name = raw_insn_name(VALUE(opcode)); @@ -323,7 +227,7 @@ pub fn insn_len(opcode: usize) -> u32 { #[cfg(not(test))] unsafe { - raw_insn_len(VALUE(opcode)).try_into().unwrap() + rb_insn_len(VALUE(opcode)).try_into().unwrap() } } @@ -346,6 +250,30 @@ pub struct VALUE(pub usize); /// Pointer to an ISEQ pub type IseqPtr = *const rb_iseq_t; +// Given an ISEQ pointer, convert PC to insn_idx +pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { + let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; + unsafe { pc.offset_from(pc_zero) }.try_into().ok() +} + +/// Given an ISEQ pointer and an instruction index, return an opcode. +pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 { + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 } +} + +/// Return a poison value to be set above the stack top to verify leafness. +#[cfg(not(test))] +pub fn vm_stack_canary() -> u64 { + unsafe { rb_vm_stack_canary() }.as_u64() +} + +/// Avoid linking the C function in `cargo test` +#[cfg(test)] +pub fn vm_stack_canary() -> u64 { + 0 +} + /// Opaque execution-context type from vm_core.h #[repr(C)] pub struct rb_execution_context_struct { @@ -373,13 +301,6 @@ pub struct rb_method_cfunc_t { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -/// Opaque FILE type from the C standard library -#[repr(C)] -pub struct FILE { - _data: [u8; 0], - _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, -} - /// Opaque call-cache type from vm_callinfo.h #[repr(C)] pub struct rb_callcache { @@ -387,13 +308,6 @@ pub struct rb_callcache { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -/// Opaque call-info type from vm_callinfo.h -#[repr(C)] -pub struct rb_callinfo_kwarg { - _data: [u8; 0], - _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, -} - /// Opaque control_frame (CFP) struct from vm_core.h #[repr(C)] pub struct rb_control_frame_struct { @@ -427,7 +341,8 @@ impl VALUE { /// Return true if the number is an immediate integer, flonum or static symbol fn immediate_p(self) -> bool { let VALUE(cval) = self; - (cval & 7) != 0 + let mask = RUBY_IMMEDIATE_MASK as usize; + (cval & mask) != 0 } /// Return true if the value is a Ruby immediate integer, flonum, static symbol, nil or false @@ -435,22 +350,50 @@ impl VALUE { self.immediate_p() || !self.test() } + /// Return true if the value is a heap object + pub fn heap_object_p(self) -> bool { + !self.special_const_p() + } + /// Return true if the value is a Ruby Fixnum (immediate-size integer) pub fn fixnum_p(self) -> bool { let VALUE(cval) = self; - (cval & 1) == 1 + let flag = RUBY_FIXNUM_FLAG as usize; + (cval & flag) == flag } /// Return true if the value is an immediate Ruby floating-point number (flonum) pub fn flonum_p(self) -> bool { let VALUE(cval) = self; - (cval & 3) == 2 + let mask = RUBY_FLONUM_MASK as usize; + let flag = RUBY_FLONUM_FLAG as usize; + (cval & mask) == flag + } + + /// Return true if the value is a Ruby symbol (RB_SYMBOL_P) + pub fn symbol_p(self) -> bool { + self.static_sym_p() || self.dynamic_sym_p() } - /// Return true for a static (non-heap) Ruby symbol + /// Return true for a static (non-heap) Ruby symbol (RB_STATIC_SYM_P) pub fn static_sym_p(self) -> bool { let VALUE(cval) = self; - (cval & 0xff) == RUBY_SYMBOL_FLAG + let flag = RUBY_SYMBOL_FLAG as usize; + (cval & 0xff) == flag + } + + /// Return true for a dynamic Ruby symbol (RB_DYNAMIC_SYM_P) + fn dynamic_sym_p(self) -> bool { + return if self.special_const_p() { + false + } else { + self.builtin_type() == RUBY_T_SYMBOL + } + } + + /// Returns true if the value is T_HASH + pub fn hash_p(self) -> bool { + !self.special_const_p() && self.builtin_type() == RUBY_T_HASH } /// Returns true or false depending on whether the value is nil @@ -458,18 +401,62 @@ impl VALUE { self == Qnil } + pub fn string_p(self) -> bool { + self.class_of() == unsafe { rb_cString } + } + /// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY) pub fn builtin_type(self) -> ruby_value_type { + (self.builtin_flags() & (RUBY_T_MASK as usize)) as ruby_value_type + } + + pub fn builtin_flags(self) -> usize { assert!(!self.special_const_p()); let VALUE(cval) = self; let rbasic_ptr = cval as *const RBasic; let flags_bits: usize = unsafe { (*rbasic_ptr).flags }.as_usize(); - (flags_bits & (RUBY_T_MASK as usize)) as ruby_value_type + return flags_bits; } pub fn class_of(self) -> VALUE { - unsafe { CLASS_OF(self) } + if !self.special_const_p() { + let builtin_type = self.builtin_type(); + assert_ne!(builtin_type, RUBY_T_NONE, "YJIT should only see live objects"); + assert_ne!(builtin_type, RUBY_T_MOVED, "YJIT should only see live objects"); + } + + unsafe { rb_yarv_class_of(self) } + } + + pub fn is_frozen(self) -> bool { + unsafe { rb_obj_frozen_p(self) != VALUE(0) } + } + + pub fn shape_too_complex(self) -> bool { + unsafe { rb_shape_obj_too_complex(self) } + } + + pub fn shape_id_of(self) -> u32 { + unsafe { rb_shape_get_shape_id(self) } + } + + pub fn shape_of(self) -> *mut rb_shape { + unsafe { + let shape = rb_shape_get_shape_by_id(self.shape_id_of()); + + if shape.is_null() { + panic!("Shape should not be null"); + } else { + shape + } + } + } + + pub fn embedded_p(self) -> bool { + unsafe { + FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0) + } } pub fn as_isize(self) -> isize { @@ -498,7 +485,7 @@ impl VALUE { pub fn as_usize(self) -> usize { let VALUE(us) = self; - us as usize + us } pub fn as_ptr<T>(self) -> *const T { @@ -511,7 +498,7 @@ impl VALUE { us as *mut T } - /// For working with opague pointers and encoding null check. + /// For working with opaque pointers and encoding null check. /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>` /// is for `*mut T` while our C functions are setup to use `*const T`. /// Casting from `NonNull<T>` to `*const T` is too noisy. @@ -591,38 +578,59 @@ impl From<VALUE> for i32 { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; assert!(uimm <= (i32::MAX as usize)); - uimm as i32 + uimm.try_into().unwrap() + } +} + +impl From<VALUE> for u16 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + uimm.try_into().unwrap() } } /// Produce a Ruby string from a Rust string slice -#[cfg(feature = "asm_comments")] +#[cfg(feature = "disasm")] pub fn rust_str_to_ruby(str: &str) -> VALUE { - unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) } + unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } } /// Produce a Ruby symbol from a Rust string slice pub fn rust_str_to_sym(str: &str) -> VALUE { let c_str = CString::new(str).unwrap(); let c_ptr: *const c_char = c_str.as_ptr(); - unsafe { rb_id2sym(rb_intern(c_ptr)) } } +/// Produce an owned Rust String from a C char pointer +pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> { + assert!(c_char_ptr != std::ptr::null()); + + let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) }; + + match c_str.to_str() { + Ok(rust_str) => Some(rust_str.to_string()), + Err(_) => None + } +} + /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { - pub file: CString, + pub file: &'static CStr, pub line: c_int, } /// Make a [SourceLocation] at the current spot. macro_rules! src_loc { () => { - // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds. - $crate::cruby::SourceLocation { - file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths - line: line!().try_into().unwrap(), // not that many lines + { + // Nul-terminated string with static lifetime, make a CStr out of it safely. + let file: &'static str = concat!(file!(), '\0'); + $crate::cruby::SourceLocation { + file: unsafe { std::ffi::CStr::from_ptr(file.as_ptr().cast()) }, + line: line!().try_into().unwrap(), + } } }; } @@ -660,17 +668,16 @@ where Err(_) => { // Theoretically we can recover from some of these panics, // but it's too late if the unwind reaches here. - use std::{process, str}; let _ = catch_unwind(|| { // IO functions can panic too. eprintln!( "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...", - str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"), + loc.file.to_string_lossy(), line, ); }); - process::abort(); + std::process::abort(); } }; @@ -681,13 +688,13 @@ where // Non-idiomatic capitalization for consistency with CRuby code #[allow(non_upper_case_globals)] -pub const Qfalse: VALUE = VALUE(0); +pub const Qfalse: VALUE = VALUE(RUBY_Qfalse as usize); #[allow(non_upper_case_globals)] -pub const Qnil: VALUE = VALUE(8); +pub const Qnil: VALUE = VALUE(RUBY_Qnil as usize); #[allow(non_upper_case_globals)] -pub const Qtrue: VALUE = VALUE(20); +pub const Qtrue: VALUE = VALUE(RUBY_Qtrue as usize); #[allow(non_upper_case_globals)] -pub const Qundef: VALUE = VALUE(52); +pub const Qundef: VALUE = VALUE(RUBY_Qundef as usize); #[allow(unused)] mod manual_defs { @@ -695,33 +702,27 @@ mod manual_defs { pub const SIZEOF_VALUE: usize = 8; pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32; + pub const VALUE_BITS: u8 = 8 * SIZEOF_VALUE as u8; pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize; pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize; pub const RUBY_FIXNUM_MIN: isize = RUBY_LONG_MIN / 2; pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2; - pub const RUBY_FIXNUM_FLAG: usize = 0x1; - - // All these are defined in include/ruby/internal/special_consts.h, - // in the same enum as RUBY_Qfalse, etc. - // Do we want to switch to using Ruby's definition of Qnil, Qfalse, etc? - pub const RUBY_SYMBOL_FLAG: usize = 0x0c; - pub const RUBY_FLONUM_FLAG: usize = 0x2; - pub const RUBY_FLONUM_MASK: usize = 0x3; - pub const RUBY_SPECIAL_SHIFT: usize = 8; - pub const RUBY_IMMEDIATE_MASK: usize = 0x7; // From vm_callinfo.h - uses calculation that seems to confuse bindgen + pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit; pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit; pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit; pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit; pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit; pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit; pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit; + pub const VM_CALL_ZSUPER : u32 = 1 << VM_CALL_ZSUPER_bit; + pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit; // From internal/struct.h - in anonymous enum, so we can't easily import it - pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize; + pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 |RUBY_FL_USER2 | RUBY_FL_USER1) as usize; // From iseq.h - via a different constant, which seems to confuse bindgen pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER7 as usize; @@ -737,9 +738,8 @@ mod manual_defs { pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr" pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary" - pub const RUBY_OFFSET_ROBJECT_AS_ARY: i32 = 16; // struct RObject, subfield "as.ary" - pub const RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV: i32 = 16; // struct RObject, subfield "as.heap.numiv" - pub const RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR: i32 = 24; // struct RObject, subfield "as.heap.ivptr" + pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary" // Constants from rb_control_frame_t vm_core.h pub const RUBY_OFFSET_CFP_PC: i32 = 0; @@ -748,9 +748,8 @@ mod manual_defs { pub const RUBY_OFFSET_CFP_SELF: i32 = 24; pub const RUBY_OFFSET_CFP_EP: i32 = 32; pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40; - pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__ - pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56; - pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64; + pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48; + pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56; // Constants from rb_execution_context_t vm_core.h pub const RUBY_OFFSET_EC_CFP: i32 = 16; @@ -766,3 +765,53 @@ mod manual_defs { pub const RUBY_OFFSET_ICE_VALUE: i32 = 8; } pub use manual_defs::*; + +/// Interned ID values for Ruby symbols and method names. +/// See [crate::cruby::ID] and usages outside of YJIT. +pub(crate) mod ids { + use std::sync::atomic::AtomicU64; + /// Globals to cache IDs on boot. Atomic to use with relaxed ordering + /// so reads can happen without `unsafe`. Initialization is done + /// single-threaded and release-acquire on [crate::yjit::YJIT_ENABLED] + /// makes sure we read the cached values after initialization is done. + macro_rules! def_ids { + ($(name: $ident:ident content: $str:literal)*) => { + $( + #[doc = concat!("[crate::cruby::ID] for `", stringify!($str), "`")] + pub static $ident: AtomicU64 = AtomicU64::new(0); + )* + + pub(crate) fn init() { + $( + let content = &$str; + let ptr: *const u8 = content.as_ptr(); + + // Lookup and cache each ID + $ident.store( + unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) }, + std::sync::atomic::Ordering::Relaxed + ); + )* + + } + } + } + + def_ids! { + name: NULL content: b"" + name: min content: b"min" + name: max content: b"max" + name: hash content: b"hash" + name: respond_to_missing content: b"respond_to_missing?" + name: to_ary content: b"to_ary" + name: eq content: b"==" + } +} + +/// Get an CRuby `ID` to an interned string, e.g. a particular method name. +macro_rules! ID { + ($id_name:ident) => { + $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed) + } +} +pub(crate) use ID; |