summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Leopold <max.leopold2111@gmail.com>2025-11-03 21:46:26 +0100
committerGitHub <noreply@github.com>2025-11-03 15:46:26 -0500
commit4001e81a8eb04ac1b7653b05762bcdcb364760e1 (patch)
tree882219f03ade6b7fa7e12974849af09a1c45d24e
parent8117600232161bdea403481ad2b9b66d36856c1a (diff)
ZJIT: Inline String#bytesize (#15033)
Inline the `String#bytesize` function and remove the C call.
-rw-r--r--test/ruby/test_zjit.rb10
-rw-r--r--zjit/src/codegen.rs9
-rw-r--r--zjit/src/cruby.rs1
-rw-r--r--zjit/src/cruby_methods.rs23
-rw-r--r--zjit/src/hir.rs11
-rw-r--r--zjit/src/hir/opt_tests.rs33
-rw-r--r--zjit/src/stats.rs2
7 files changed, 57 insertions, 32 deletions
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index f12ac19af5..de2d1e6152 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -2568,6 +2568,16 @@ class TestZJIT < Test::Unit::TestCase
}, call_threshold: 2
end
+ def test_string_bytesize_multibyte
+ assert_compiles '4', %q{
+ def test(s)
+ s.bytesize
+ end
+
+ test("💎")
+ }, call_threshold: 2
+ end
+
def test_nil_value_nil_opt_with_guard
assert_compiles 'true', %q{
def test(val) = val.nil?
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 72d111ac8a..7cd677bde3 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -403,6 +403,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::IsBitEqual { left, right } => gen_is_bit_equal(asm, opnd!(left), opnd!(right)),
&Insn::IsBitNotEqual { left, right } => gen_is_bit_not_equal(asm, opnd!(left), opnd!(right)),
&Insn::BoxBool { val } => gen_box_bool(asm, opnd!(val)),
+ &Insn::BoxFixnum { val, state } => gen_box_fixnum(jit, asm, opnd!(val), &function.frame_state(state)),
Insn::Test { val } => gen_test(asm, opnd!(val)),
Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)),
Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)),
@@ -1595,6 +1596,14 @@ fn gen_box_bool(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd {
asm.csel_nz(Opnd::Value(Qtrue), Opnd::Value(Qfalse))
}
+fn gen_box_fixnum(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd {
+ // Load the value, then test for overflow and tag it
+ let val = asm.load(val);
+ let shifted = asm.lshift(val, Opnd::UImm(1));
+ asm.jo(side_exit(jit, state, BoxFixnumOverflow));
+ asm.or(shifted, Opnd::UImm(RUBY_FIXNUM_FLAG as u64))
+}
+
fn gen_anytostring(asm: &mut Assembler, val: lir::Opnd, str: lir::Opnd, state: &FrameState) -> lir::Opnd {
gen_prepare_leaf_call_with_gc(asm, state);
diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs
index 89488fd255..631acbd863 100644
--- a/zjit/src/cruby.rs
+++ b/zjit/src/cruby.rs
@@ -1376,6 +1376,7 @@ pub(crate) mod ids {
name: freeze
name: minusat content: b"-@"
name: aref content: b"[]"
+ name: len
name: _as_heap
}
diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs
index 0af3be1819..37d75f4597 100644
--- a/zjit/src/cruby_methods.rs
+++ b/zjit/src/cruby_methods.rs
@@ -195,7 +195,7 @@ pub fn init() -> Annotations {
annotate!(rb_mKernel, "itself", inline_kernel_itself);
annotate!(rb_mKernel, "block_given?", inline_kernel_block_given_p);
annotate!(rb_mKernel, "===", inline_eqq);
- annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf, elidable);
+ annotate!(rb_cString, "bytesize", inline_string_bytesize);
annotate!(rb_cString, "size", types::Fixnum, no_gc, leaf, elidable);
annotate!(rb_cString, "length", types::Fixnum, no_gc, leaf, elidable);
annotate!(rb_cString, "getbyte", inline_string_getbyte);
@@ -305,6 +305,27 @@ fn inline_hash_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::Ins
None
}
+
+fn inline_string_bytesize(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option<hir::InsnId> {
+ if args.is_empty() && fun.likely_a(recv, types::String, state) {
+ let recv = fun.coerce_to(block, recv, types::String, state);
+ let len = fun.push_insn(block, hir::Insn::LoadField {
+ recv,
+ id: ID!(len),
+ offset: RUBY_OFFSET_RSTRING_LEN as i32,
+ return_type: types::CInt64,
+ });
+
+ let result = fun.push_insn(block, hir::Insn::BoxFixnum {
+ val: len,
+ state,
+ });
+
+ return Some(result);
+ }
+ None
+}
+
fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option<hir::InsnId> {
let &[index] = args else { return None; };
if fun.likely_a(index, types::Fixnum, state) {
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index ce164f37d6..136f7b452f 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -478,6 +478,7 @@ pub enum SideExitReason {
BlockParamProxyNotIseqOrIfunc,
StackOverflow,
FixnumModByZero,
+ BoxFixnumOverflow,
}
#[derive(Debug, Clone, Copy)]
@@ -655,6 +656,8 @@ pub enum Insn {
IsBitNotEqual { left: InsnId, right: InsnId },
/// Convert a C `bool` to a Ruby `Qtrue`/`Qfalse`. Same as `RBOOL` macro.
BoxBool { val: InsnId },
+ /// Convert a C `long` to a Ruby `Fixnum`. Side exit on overflow.
+ BoxFixnum { val: InsnId, state: InsnId },
// TODO(max): In iseq body types that are not ISEQ_TYPE_METHOD, rewrite to Constant false.
Defined { op_type: usize, obj: VALUE, pushval: VALUE, v: InsnId, state: InsnId },
GetConstantPath { ic: *const iseq_inline_constant_cache, state: InsnId },
@@ -924,6 +927,7 @@ impl Insn {
Insn::NewRangeFixnum { .. } => false,
Insn::StringGetbyteFixnum { .. } => false,
Insn::IsBlockGiven => false,
+ Insn::BoxFixnum { .. } => false,
_ => true,
}
}
@@ -1057,6 +1061,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Insn::IsBitEqual { left, right } => write!(f, "IsBitEqual {left}, {right}"),
Insn::IsBitNotEqual { left, right } => write!(f, "IsBitNotEqual {left}, {right}"),
Insn::BoxBool { val } => write!(f, "BoxBool {val}"),
+ Insn::BoxFixnum { val, .. } => write!(f, "BoxFixnum {val}"),
Insn::Jump(target) => { write!(f, "Jump {target}") }
Insn::IfTrue { val, target } => { write!(f, "IfTrue {val}, {target}") }
Insn::IfFalse { val, target } => { write!(f, "IfFalse {val}, {target}") }
@@ -1696,6 +1701,7 @@ impl Function {
&IsBitEqual { left, right } => IsBitEqual { left: find!(left), right: find!(right) },
&IsBitNotEqual { left, right } => IsBitNotEqual { left: find!(left), right: find!(right) },
&BoxBool { val } => BoxBool { val: find!(val) },
+ &BoxFixnum { val, state } => BoxFixnum { val: find!(val), state: find!(state) },
Jump(target) => Jump(find_branch_edge!(target)),
&IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) },
&IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) },
@@ -1888,6 +1894,7 @@ impl Function {
Insn::IsBitEqual { .. } => types::CBool,
Insn::IsBitNotEqual { .. } => types::CBool,
Insn::BoxBool { .. } => types::BoolExact,
+ Insn::BoxFixnum { .. } => types::Fixnum,
Insn::StringCopy { .. } => types::StringExact,
Insn::StringIntern { .. } => types::Symbol,
Insn::StringConcat { .. } => types::StringExact,
@@ -3281,7 +3288,8 @@ impl Function {
| &Insn::GuardNotFrozen { val, state }
| &Insn::ToArray { val, state }
| &Insn::IsMethodCfunc { val, state, .. }
- | &Insn::ToNewArray { val, state } => {
+ | &Insn::ToNewArray { val, state }
+ | &Insn::BoxFixnum { val, state } => {
worklist.push_back(val);
worklist.push_back(state);
}
@@ -3759,6 +3767,7 @@ impl Function {
}
}
Insn::BoxBool { val } => self.assert_subtype(insn_id, val, types::CBool),
+ Insn::BoxFixnum { val, .. } => self.assert_subtype(insn_id, val, types::CInt64),
Insn::SetGlobal { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject),
Insn::GetIvar { self_val, .. } => self.assert_subtype(insn_id, self_val, types::BasicObject),
Insn::SetIvar { self_val, val, .. } => {
diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs
index d0b3203ac1..07f80c0682 100644
--- a/zjit/src/hir/opt_tests.rs
+++ b/zjit/src/hir/opt_tests.rs
@@ -2687,34 +2687,6 @@ mod hir_opt_tests {
}
#[test]
- fn string_bytesize_simple() {
- eval("
- def test = 'abc'.bytesize
- test
- test
- ");
- assert_snapshot!(hir_string("test"), @r"
- fn test@<compiled>:2:
- bb0():
- EntryPoint interpreter
- v1:BasicObject = LoadSelf
- Jump bb2(v1)
- bb1(v4:BasicObject):
- EntryPoint JIT(0)
- Jump bb2(v4)
- bb2(v6:BasicObject):
- v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
- v12:StringExact = StringCopy v10
- PatchPoint MethodRedefined(String@0x1008, bytesize@0x1010, cme:0x1018)
- PatchPoint NoSingletonClass(String@0x1008)
- IncrCounter inline_cfunc_optimized_send_count
- v24:Fixnum = CCall bytesize@0x1040, v12
- CheckInterrupts
- Return v24
- ");
- }
-
- #[test]
fn dont_replace_get_constant_path_with_empty_ic() {
eval("
def test = Kernel
@@ -7161,7 +7133,7 @@ mod hir_opt_tests {
}
#[test]
- fn test_specialize_string_bytesize() {
+ fn test_inline_string_bytesize() {
eval(r#"
def test(s)
s.bytesize
@@ -7182,8 +7154,9 @@ mod hir_opt_tests {
PatchPoint MethodRedefined(String@0x1000, bytesize@0x1008, cme:0x1010)
PatchPoint NoSingletonClass(String@0x1000)
v23:StringExact = GuardType v9, StringExact
+ v24:CInt64 = LoadField v23, :len@0x1038
+ v25:Fixnum = BoxFixnum v24
IncrCounter inline_cfunc_optimized_send_count
- v25:Fixnum = CCall bytesize@0x1038, v23
CheckInterrupts
Return v25
");
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index 2d7139eec6..35af2b1d9d 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -139,6 +139,7 @@ make_counters! {
exit_fixnum_sub_overflow,
exit_fixnum_mult_overflow,
exit_fixnum_mod_by_zero,
+ exit_box_fixnum_overflow,
exit_guard_type_failure,
exit_guard_type_not_failure,
exit_guard_bit_equals_failure,
@@ -378,6 +379,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter {
FixnumSubOverflow => exit_fixnum_sub_overflow,
FixnumMultOverflow => exit_fixnum_mult_overflow,
FixnumModByZero => exit_fixnum_mod_by_zero,
+ BoxFixnumOverflow => exit_box_fixnum_overflow,
GuardType(_) => exit_guard_type_failure,
GuardTypeNot(_) => exit_guard_type_not_failure,
GuardBitEquals(_) => exit_guard_bit_equals_failure,