diff options
| author | Aaron Patterson <tenderlove@ruby-lang.org> | 2026-05-08 14:23:18 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-05-08 14:23:18 -0700 |
| commit | e9e4647e6667743e26db037beaa6a56bc8c70f48 (patch) | |
| tree | 620fe6fe87a73e08520e8ccabda543531fd46e9d | |
| parent | 12bb8955263bb8cb82bdcd642556ab360cbb7b12 (diff) | |
ZJIT: add an unreachable instruction (#16901)
Unreachable instructions terminate blocks. We'll use this mostly for
testing as a terminator instruction (since traditional BB's will require
all blocks to end with a terminator)
| -rw-r--r-- | zjit/src/asm/arm64/inst/mod.rs | 2 | ||||
| -rw-r--r-- | zjit/src/asm/arm64/inst/udf.rs | 52 | ||||
| -rw-r--r-- | zjit/src/asm/arm64/mod.rs | 6 | ||||
| -rw-r--r-- | zjit/src/backend/arm64/mod.rs | 3 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 13 | ||||
| -rw-r--r-- | zjit/src/backend/x86_64/mod.rs | 1 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 1 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 16 |
8 files changed, 87 insertions, 7 deletions
diff --git a/zjit/src/asm/arm64/inst/mod.rs b/zjit/src/asm/arm64/inst/mod.rs index bfffd914ef..270c784f27 100644 --- a/zjit/src/asm/arm64/inst/mod.rs +++ b/zjit/src/asm/arm64/inst/mod.rs @@ -26,6 +26,7 @@ mod sbfm; mod shift_imm; mod sys_reg; mod test_bit; +mod udf; pub use atomic::Atomic; pub use branch::Branch; @@ -52,3 +53,4 @@ pub use sbfm::SBFM; pub use shift_imm::ShiftImm; pub use sys_reg::SysReg; pub use test_bit::TestBit; +pub use udf::Udf; diff --git a/zjit/src/asm/arm64/inst/udf.rs b/zjit/src/asm/arm64/inst/udf.rs new file mode 100644 index 0000000000..297d17ed62 --- /dev/null +++ b/zjit/src/asm/arm64/inst/udf.rs @@ -0,0 +1,52 @@ +/// The struct that represents an A64 permanently undefined instruction. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | +/// | imm16..................................................| +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Udf { + /// The immediate value encoded in the instruction + imm16: u16 +} + +impl Udf { + /// UDF - Permanently Undefined + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-> + pub fn udf(imm16: u16) -> Self { + Self { imm16 } + } +} + +impl From<Udf> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Udf) -> Self { + inst.imm16 as u32 + } +} + +impl From<Udf> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Udf) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_udf() { + let result: u32 = Udf::udf(0).into(); + assert_eq!(0x00000000, result); + } + + #[test] + fn test_udf_imm() { + let result: u32 = Udf::udf(1).into(); + assert_eq!(0x00000001, result); + } +} diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs index a360d7738b..b53f1cf673 100644 --- a/zjit/src/asm/arm64/mod.rs +++ b/zjit/src/asm/arm64/mod.rs @@ -321,6 +321,12 @@ pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { cb.write_bytes(&bytes); } +/// UDF - permanently undefined instruction +pub fn udf(cb: &mut CodeBlock, imm16: u16) { + let bytes: [u8; 4] = Udf::udf(imm16).into(); + cb.write_bytes(&bytes); +} + /// CMP - compare rn and rm, update flags pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rn, rm) { diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 54c803168d..4d7aa2c953 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1561,6 +1561,9 @@ impl Assembler { Insn::Breakpoint => { brk(cb, A64Opnd::None); }, + Insn::Abort => { + udf(cb, u16::MAX); + }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index bb8d1e1e73..7335680f84 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -653,6 +653,9 @@ pub enum Insn { #[allow(dead_code)] Breakpoint, + // Abort the process + Abort, + /// Add a comment into the IR at the point that this instruction is added. /// It won't have any impact on that actual compiled code. Comment(String), @@ -895,6 +898,7 @@ impl Insn { Insn::And { .. } => "And", Insn::BakeString(_) => "BakeString", Insn::Breakpoint => "Breakpoint", + Insn::Abort => "Abort", Insn::Comment(_) => "Comment", Insn::Cmp { .. } => "Cmp", Insn::CPop { .. } => "CPop", @@ -1185,7 +1189,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { } Insn::BakeString(_) | - Insn::Breakpoint | + Insn::Breakpoint | Insn::Abort | Insn::Comment(_) | Insn::CPop { .. } | Insn::PadPatchPoint | @@ -1363,7 +1367,7 @@ impl<'a> InsnOpndMutIterator<'a> { } Insn::BakeString(_) | - Insn::Breakpoint | + Insn::Breakpoint | Insn::Abort | Insn::Comment(_) | Insn::CPop { .. } | Insn::FrameSetup { .. } | @@ -3465,6 +3469,11 @@ impl Assembler { self.push_insn(Insn::Breakpoint); } + #[allow(dead_code)] + pub fn abort(&mut self) { + self.push_insn(Insn::Abort); + } + /// Call a C function without PosMarkers pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd { let canary_opnd = self.set_stack_canary(); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 3904bfd71f..a3af9856da 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -1089,6 +1089,7 @@ impl Assembler { }, Insn::Breakpoint => int3(cb), + Insn::Abort => ud2(cb), Insn::CSelZ { truthy, falsy, out } => { emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index b9b8b6509a..097257ddf8 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -754,6 +754,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state)), &Insn::CheckInterrupts { state } => no_output!(gen_check_interrupts(jit, asm, &function.frame_state(state))), Insn::BreakPoint => no_output!(asm.breakpoint()), + Insn::Unreachable => no_output!(asm.abort()), &Insn::HashDup { val, state } => { gen_hash_dup(asm, opnd!(val), &function.frame_state(state)) }, &Insn::HashAref { hash, key, state } => { gen_hash_aref(jit, asm, opnd!(hash), opnd!(key), &function.frame_state(state)) }, &Insn::HashAset { hash, key, val, state } => { no_output!(gen_hash_aset(jit, asm, opnd!(hash), opnd!(key), opnd!(val), &function.frame_state(state))) }, diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 27bf5df427..1d8358cbad 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1147,6 +1147,11 @@ pub enum Insn { CheckInterrupts { state: InsnId }, BreakPoint, + + /// Only use this instruction in tests where you need to end a block with + /// a terminator, but don't ever expect the code to be executed. This + /// instruction should never be generated from iseq_to_hir + Unreachable, } /// Macro that enumerates all operands of an Insn, dispatching to caller-provided @@ -1165,7 +1170,7 @@ macro_rules! for_each_operand_impl { | Insn::LoadEC | Insn::GetEP { .. } | Insn::LoadSelf - | Insn::BreakPoint + | Insn::BreakPoint | Insn::Unreachable | Insn::PutSpecialObject { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } => {} @@ -1471,7 +1476,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::BreakPoint + | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1698,7 +1703,7 @@ impl Insn { abstract_heaps::Control ), Insn::Entries { .. } => effects::Any, - Insn::BreakPoint => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control), + Insn::BreakPoint | Insn::Unreachable => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control), } } @@ -2223,6 +2228,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::CheckInterrupts { .. } => write!(f, "CheckInterrupts"), Insn::IsA { val, class } => write!(f, "IsA {val}, {class}"), Insn::BreakPoint => write!(f, "BreakPoint"), + Insn::Unreachable => write!(f, "Unreachable"), } } } @@ -2837,7 +2843,7 @@ impl Function { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::BreakPoint + | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -5810,7 +5816,7 @@ impl Function { | Insn::LoadSP | Insn::LoadEC | Insn::GetEP { .. } - | Insn::BreakPoint + | Insn::BreakPoint | Insn::Unreachable | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. } |
