From 17cd9bffd37710dbcd746a62b3d49ea7213095ed Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 25 Mar 2026 09:35:56 -0400 Subject: ZJIT: Support negative array indices Previously we would side-exit if the index was negative. Instead, adjust the index to be in-bounds. --- zjit/src/codegen.rs | 9 ++++ zjit/src/cruby_methods.rs | 1 + zjit/src/hir.rs | 27 ++++++++++ zjit/src/hir/opt_tests.rs | 126 ++++++++++++++++++++++++++-------------------- 4 files changed, 109 insertions(+), 54 deletions(-) (limited to 'zjit') diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 178b122ee6..af6e881bd0 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -561,6 +561,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::NewRange { low, high, flag, state } => gen_new_range(jit, asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::NewRangeFixnum { low, high, flag, state } => gen_new_range_fixnum(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)), + Insn::AdjustBounds { index, length } => gen_adjust_bounds(asm, opnd!(index), opnd!(length)), Insn::ArrayAref { array, index, .. } => gen_array_aref(asm, opnd!(array), opnd!(index)), Insn::ArrayAset { array, index, val } => { no_output!(gen_array_aset(asm, opnd!(array), opnd!(index), opnd!(val))) @@ -1781,6 +1782,14 @@ fn gen_new_array( } } +/// Adjust potentially-negative index by the given length, returning the adjusted index. If still negative, +/// return a negative number, which indicates the index is still out-of-bounds. +fn gen_adjust_bounds(asm: &mut Assembler, index: Opnd, length: Opnd) -> lir::Opnd { + let adjusted = asm.add(index, length); + asm.test(index, index); + asm.csel_l(adjusted, index) +} + /// Compile array access (`array[index]`) fn gen_array_aref( asm: &mut Assembler, diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index d39f380287..bd4310e084 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -354,6 +354,7 @@ fn inline_array_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::In let index = fun.push_insn(block, hir::Insn::UnboxFixnum { val: index }); let length = fun.push_insn(block, hir::Insn::ArrayLength { array: recv }); let index = fun.push_insn(block, hir::Insn::GuardLess { left: index, right: length, state }); + let index = fun.push_insn(block, hir::Insn::AdjustBounds { index, length }); let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); use crate::hir::SideExitReason; let index = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: index, right: zero, reason: SideExitReason::GuardGreaterEq, state }); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index b14298c935..f2e0218117 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -806,6 +806,9 @@ pub enum Insn { ArrayPop { array: InsnId, state: InsnId }, /// Return the length of the array as a C `long` ([`types::CInt64`]) ArrayLength { array: InsnId }, + /// Adjust potentially-negative index by the given length, returning the adjusted index. If + /// still negative, return a negative number, which indicates the index is still out-of-bounds. + AdjustBounds { index: InsnId, length: InsnId }, HashAref { hash: InsnId, key: InsnId, state: InsnId }, HashAset { hash: InsnId, key: InsnId, val: InsnId, state: InsnId }, @@ -1270,6 +1273,10 @@ macro_rules! for_each_operand_impl { Insn::ArrayLength { array } => { $visit_one!(array); } + Insn::AdjustBounds { index, length } => { + $visit_one!(index); + $visit_one!(length); + } Insn::HashAref { hash, key, state } => { $visit_one!(hash); $visit_one!(key); @@ -1472,6 +1479,7 @@ impl Insn { Insn::ArrayAset { .. } => effects::Any, Insn::ArrayPop { .. } => effects::Any, Insn::ArrayLength { .. } => Effect::write(abstract_heaps::Empty), + Insn::AdjustBounds { .. } => effects::Empty, Insn::HashAref { .. } => effects::Any, Insn::HashAset { .. } => effects::Any, Insn::HashDup { .. } => allocates, @@ -1713,6 +1721,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::ArrayLength { array } => { write!(f, "ArrayLength {array}") } + Insn::AdjustBounds { index, length } => { + write!(f, "AdjustBounds {index}, {length}") + } Insn::NewHash { elements, .. } => { write!(f, "NewHash")?; let mut prefix = " "; @@ -2806,6 +2817,7 @@ impl Function { &ArrayAset { array, index, val } => ArrayAset { array: find!(array), index: find!(index), val: find!(val) }, &ArrayPop { array, state } => ArrayPop { array: find!(array), state: find!(state) }, &ArrayLength { array } => ArrayLength { array: find!(array) }, + &AdjustBounds { index, length } => AdjustBounds { index: find!(index), length: find!(length) }, &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, &ArrayInclude { ref elements, target, state } => ArrayInclude { elements: find_vec!(elements), target: find!(target), state: find!(state) }, &ArrayPackBuffer { ref elements, fmt, buffer, state } => ArrayPackBuffer { elements: find_vec!(elements), fmt: find!(fmt), buffer: find!(buffer), state: find!(state) }, @@ -2923,6 +2935,7 @@ impl Function { Insn::ArrayAref { .. } => types::BasicObject, Insn::ArrayPop { .. } => types::BasicObject, Insn::ArrayLength { .. } => types::CInt64, + Insn::AdjustBounds { .. } => types::CInt64, Insn::HashAref { .. } => types::BasicObject, Insn::NewHash { .. } => types::HashExact, Insn::HashDup { .. } => types::HashExact, @@ -5281,6 +5294,16 @@ impl Function { _ => insn_id, } } + Insn::AdjustBounds { index, .. } => { + // If index is known nonnegative, then we don't need to adjust bounds. + if self.type_of(index).cint64_value().filter(|&i| i >= 0).is_some() { + self.make_equal_to(insn_id, index); + // Don't bother re-inferring the type of index; we already know it. + continue; + } else { + insn_id + } + } Insn::Test { val } if self.type_of(val).is_known_falsy() => { self.new_insn(Insn::Const { val: Const::CBool(false) }) } @@ -6063,6 +6086,10 @@ impl Function { self.assert_subtype(insn_id, array, types::ArrayExact)?; self.assert_subtype(insn_id, index, types::CInt64) } + Insn::AdjustBounds { index, length } => { + self.assert_subtype(insn_id, index, types::CInt64)?; + self.assert_subtype(insn_id, length, types::CInt64) + } // Instructions with Hash operands Insn::HashAref { hash, .. } | Insn::HashAset { hash, .. } => self.assert_subtype(insn_id, hash, types::HashExact), diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index fb9e92b5bf..db3889449f 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -1014,12 +1014,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) v27:ArrayExact = GuardType v10, ArrayExact - v34:CInt64[0] = Const CInt64(0) + v35:CInt64[0] = Const CInt64(0) v29:CInt64 = ArrayLength v27 - v30:CInt64[0] = GuardLess v34, v29 - v33:BasicObject = ArrayAref v27, v30 + v30:CInt64[0] = GuardLess v35, v29 + v34:BasicObject = ArrayAref v27, v30 CheckInterrupts - Return v33 + Return v34 "); } @@ -1047,12 +1047,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) v27:ArrayExact = GuardType v10, ArrayExact - v34:CInt64[0] = Const CInt64(0) + v35:CInt64[0] = Const CInt64(0) v29:CInt64 = ArrayLength v27 - v30:CInt64[0] = GuardLess v34, v29 - v33:BasicObject = ArrayAref v27, v30 + v30:CInt64[0] = GuardLess v35, v29 + v34:BasicObject = ArrayAref v27, v30 CheckInterrupts - Return v33 + Return v34 "); } @@ -1077,10 +1077,15 @@ mod hir_opt_tests { v13:Fixnum[-10] = Const Value(-10) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v31:CInt64[-10] = Const CInt64(-10) + v32:CInt64[-10] = Const CInt64(-10) v26:CInt64 = ArrayLength v11 - v27:CInt64[-10] = GuardLess v31, v26 - SideExit GuardGreaterEq + v27:CInt64[-10] = GuardLess v32, v26 + v28:CInt64 = AdjustBounds v27, v26 + v29:CInt64[0] = Const CInt64(0) + v30:CInt64 = GuardGreaterEq v28, v29 + v31:BasicObject = ArrayAref v11, v30 + CheckInterrupts + Return v31 "); } @@ -2343,12 +2348,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) v27:ArrayExact = GuardType v10, ArrayExact - v34:CInt64[0] = Const CInt64(0) + v35:CInt64[0] = Const CInt64(0) v29:CInt64 = ArrayLength v27 - v30:CInt64[0] = GuardLess v34, v29 - v33:BasicObject = ArrayAref v27, v30 + v30:CInt64[0] = GuardLess v35, v29 + v34:BasicObject = ArrayAref v27, v30 CheckInterrupts - Return v33 + Return v34 "); assert_snapshot!(inspect("test [1,2,3]"), @"1"); } @@ -6055,12 +6060,12 @@ mod hir_opt_tests { v12:Fixnum[0] = Const Value(0) PatchPoint NoSingletonClass(Array@0x1010) PatchPoint MethodRedefined(Array@0x1010, []@0x1018, cme:0x1020) - v33:CInt64[0] = Const CInt64(0) + v34:CInt64[0] = Const CInt64(0) v28:CInt64 = ArrayLength v23 - v29:CInt64[0] = GuardLess v33, v28 - v32:BasicObject = ArrayAref v23, v29 + v29:CInt64[0] = GuardLess v34, v28 + v33:BasicObject = ArrayAref v23, v29 CheckInterrupts - Return v32 + Return v33 "); // TODO(max): Check the result of `S[0] = 5; test` using `inspect` to make sure that we // actually do the load at run-time. @@ -6087,12 +6092,12 @@ mod hir_opt_tests { v13:Fixnum[1] = Const Value(1) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v31:CInt64[1] = Const CInt64(1) + v32:CInt64[1] = Const CInt64(1) v26:CInt64 = ArrayLength v11 - v27:CInt64[1] = GuardLess v31, v26 - v32:Fixnum[5] = Const Value(5) + v27:CInt64[1] = GuardLess v32, v26 + v33:Fixnum[5] = Const Value(5) CheckInterrupts - Return v32 + Return v33 "); } @@ -6117,10 +6122,15 @@ mod hir_opt_tests { v13:Fixnum[-3] = Const Value(-3) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v31:CInt64[-3] = Const CInt64(-3) + v32:CInt64[-3] = Const CInt64(-3) v26:CInt64 = ArrayLength v11 - v27:CInt64[-3] = GuardLess v31, v26 - SideExit GuardGreaterEq + v27:CInt64[-3] = GuardLess v32, v26 + v28:CInt64 = AdjustBounds v27, v26 + v29:CInt64[0] = Const CInt64(0) + v30:CInt64 = GuardGreaterEq v28, v29 + v31:BasicObject = ArrayAref v11, v30 + CheckInterrupts + Return v31 "); } @@ -6145,10 +6155,15 @@ mod hir_opt_tests { v13:Fixnum[-10] = Const Value(-10) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v31:CInt64[-10] = Const CInt64(-10) + v32:CInt64[-10] = Const CInt64(-10) v26:CInt64 = ArrayLength v11 - v27:CInt64[-10] = GuardLess v31, v26 - SideExit GuardGreaterEq + v27:CInt64[-10] = GuardLess v32, v26 + v28:CInt64 = AdjustBounds v27, v26 + v29:CInt64[0] = Const CInt64(0) + v30:CInt64 = GuardGreaterEq v28, v29 + v31:BasicObject = ArrayAref v11, v30 + CheckInterrupts + Return v31 "); } @@ -6173,12 +6188,12 @@ mod hir_opt_tests { v13:Fixnum[10] = Const Value(10) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v31:CInt64[10] = Const CInt64(10) + v32:CInt64[10] = Const CInt64(10) v26:CInt64 = ArrayLength v11 - v27:CInt64[10] = GuardLess v31, v26 - v32:NilClass = Const Value(nil) + v27:CInt64[10] = GuardLess v32, v26 + v33:NilClass = Const Value(nil) CheckInterrupts - Return v32 + Return v33 "); } @@ -8665,12 +8680,12 @@ mod hir_opt_tests { v19:Fixnum[0] = Const Value(0) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v37:CInt64[0] = Const CInt64(0) + v38:CInt64[0] = Const CInt64(0) v32:CInt64 = ArrayLength v14 - v33:CInt64[0] = GuardLess v37, v32 - v36:BasicObject = ArrayAref v14, v33 + v33:CInt64[0] = GuardLess v38, v32 + v37:BasicObject = ArrayAref v14, v33 CheckInterrupts - Return v36 + Return v37 "); } @@ -8705,11 +8720,12 @@ mod hir_opt_tests { v31:CInt64 = UnboxFixnum v30 v32:CInt64 = ArrayLength v29 v33:CInt64 = GuardLess v31, v32 - v34:CInt64[0] = Const CInt64(0) - v35:CInt64 = GuardGreaterEq v33, v34 - v36:BasicObject = ArrayAref v29, v35 + v34:CInt64 = AdjustBounds v33, v32 + v35:CInt64[0] = Const CInt64(0) + v36:CInt64 = GuardGreaterEq v34, v35 + v37:BasicObject = ArrayAref v29, v36 CheckInterrupts - Return v36 + Return v37 "); } @@ -8745,11 +8761,12 @@ mod hir_opt_tests { v31:CInt64 = UnboxFixnum v30 v32:CInt64 = ArrayLength v29 v33:CInt64 = GuardLess v31, v32 - v34:CInt64[0] = Const CInt64(0) - v35:CInt64 = GuardGreaterEq v33, v34 - v36:BasicObject = ArrayAref v29, v35 + v34:CInt64 = AdjustBounds v33, v32 + v35:CInt64[0] = Const CInt64(0) + v36:CInt64 = GuardGreaterEq v34, v35 + v37:BasicObject = ArrayAref v29, v36 CheckInterrupts - Return v36 + Return v37 "); } @@ -9340,21 +9357,22 @@ mod hir_opt_tests { v25:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v24, Value(VALUE(0x1048)) v26:RubyValue = LoadField v23, :_ep_specval@0x1050 v27:FalseClass = GuardBitEquals v26, Value(false) - v37:CPtr = GetEP 0 - v38:RubyValue = LoadField v37, :_ep_method_entry@0x1040 - v39:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v38, Value(VALUE(0x1048)) - v40:RubyValue = LoadField v37, :_ep_specval@0x1050 - v41:FalseClass = GuardBitEquals v40, Value(false) + v38:CPtr = GetEP 0 + v39:RubyValue = LoadField v38, :_ep_method_entry@0x1040 + v40:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v39, Value(VALUE(0x1048)) + v41:RubyValue = LoadField v38, :_ep_specval@0x1050 + v42:FalseClass = GuardBitEquals v41, Value(false) v28:Array = GuardType v9, Array v29:Fixnum = GuardType v10, Fixnum v30:CInt64 = UnboxFixnum v29 v31:CInt64 = ArrayLength v28 v32:CInt64 = GuardLess v30, v31 - v33:CInt64[0] = Const CInt64(0) - v34:CInt64 = GuardGreaterEq v32, v33 - v35:BasicObject = ArrayAref v28, v34 + v33:CInt64 = AdjustBounds v32, v31 + v34:CInt64[0] = Const CInt64(0) + v35:CInt64 = GuardGreaterEq v33, v34 + v36:BasicObject = ArrayAref v28, v35 CheckInterrupts - Return v35 + Return v36 "); } -- cgit v1.2.3