diff options
| author | Daniel Colson <danieljamescolson@gmail.com> | 2025-08-19 10:02:13 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-19 10:02:13 -0400 |
| commit | fc5ee247d5307a292cd2b083ce82fc24005bb385 (patch) | |
| tree | d3d6e0514685e9d258167d203f07bbbeddb87475 /zjit/src | |
| parent | 6281806fc6dee87ec39a545ce0a157d740446674 (diff) | |
ZJIT: Compile toregexp (#14200)
`toregexp` is fairly similar to `concatstrings`, so this commit extracts
a helper for pushing and popping operands on the native stack.
There's probably opportunity to move some of this into lir (e.g. Alan
suggested a push_many that could use STP on ARM to push 2 at a time),
but I might save that for another day.
Diffstat (limited to 'zjit/src')
| -rw-r--r-- | zjit/src/codegen.rs | 41 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 5 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 89 |
3 files changed, 126 insertions, 9 deletions
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5780a26357..37f10c92eb 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::StringConcat { strings, .. } if strings.is_empty() => return None, Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), + Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"), Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), @@ -1595,36 +1596,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> { }) } -fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd { - gen_prepare_non_leaf_call(jit, asm, state); +fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { + let n = opnds.len(); // Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR // At this point, frame_setup(&[], jit.c_stack_slots) has been called, // which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack let frame_size = aligned_stack_bytes(jit.c_stack_slots); - let n = strings.len(); let allocation_size = aligned_stack_bytes(n); - asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n); + asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n); asm.sub_into(NATIVE_STACK_PTR, allocation_size.into()); // Calculate the total offset from NATIVE_BASE_PTR to our buffer let total_offset_from_base = (frame_size + allocation_size) as i32; - for (idx, &string_opnd) in strings.iter().enumerate() { + for (idx, &opnd) in opnds.iter().enumerate() { let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32); asm.mov( Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset), - string_opnd + opnd ); } - let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)); - - let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr); + asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)) +} +fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) { asm_comment!(asm, "restore C stack pointer"); + let allocation_size = aligned_stack_bytes(opnds.len()); asm.add_into(NATIVE_STACK_PTR, allocation_size.into()); +} + +fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_opnd_ptr = gen_push_opnds(jit, asm, &values); + + let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr); + let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into()); + asm_ccall!(asm, rb_ary_clear, tmp_ary); + + gen_pop_opnds(asm, &values); + + result +} + +fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_string_ptr = gen_push_opnds(jit, asm, &strings); + let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr); + gen_pop_opnds(asm, &strings); result } diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 5c939fabe7..524b06b580 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -30,6 +30,11 @@ impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> { fmt.write_str("__IncompleteArrayField") } } +pub const ONIG_OPTION_IGNORECASE: u32 = 1; +pub const ONIG_OPTION_EXTEND: u32 = 2; +pub const ONIG_OPTION_MULTILINE: u32 = 4; +pub const ARG_ENCODING_FIXED: u32 = 16; +pub const ARG_ENCODING_NONE: u32 = 32; pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; pub const STRING_REDEFINED_OP_FLAG: u32 = 4; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index afe358ec1d..7c7e09663b 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -473,6 +473,9 @@ pub enum Insn { StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec<InsnId>, state: InsnId }, + /// Combine count stack values into a regexp + ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId }, + /// Put special object (VMCORE, CBASE, etc.) based on value_type PutSpecialObject { value_type: SpecialObjectType }, @@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> { ptr_map: &'a PtrPrintMap, } +static REGEXP_FLAGS: &[(u32, &str)] = &[ + (ONIG_OPTION_MULTILINE, "MULTILINE"), + (ONIG_OPTION_IGNORECASE, "IGNORECASE"), + (ONIG_OPTION_EXTEND, "EXTENDED"), + (ARG_ENCODING_FIXED, "FIXEDENCODING"), + (ARG_ENCODING_NONE, "NOENCODING"), +]; + impl<'a> std::fmt::Display for InsnPrinter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match &self.inner { @@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } + Insn::ToRegexp { values, opt, .. } => { + write!(f, "ToRegexp")?; + let mut prefix = " "; + for value in values { + write!(f, "{prefix}{value}")?; + prefix = ", "; + } + + let opt = *opt as u32; + if opt != 0 { + write!(f, ", ")?; + let mut sep = ""; + for (flag, name) in REGEXP_FLAGS { + if opt & flag != 0 { + write!(f, "{sep}{name}")?; + sep = "|"; + } + } + } + + Ok(()) + } Insn::Test { val } => { write!(f, "Test {val}") } Insn::IsNil { val } => { write!(f, "IsNil {val}") } Insn::Jump(target) => { write!(f, "Jump {target}") } @@ -1179,6 +1212,7 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, + &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, &Jump(ref target) => Jump(find_branch_edge!(target)), @@ -1305,6 +1339,7 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, + Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, Insn::NewHash { .. } => types::HashExact, @@ -1939,6 +1974,10 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } + &Insn::ToRegexp { ref values, state, .. } => { + worklist.extend(values); + worklist.push_back(state); + } | &Insn::Return { val } | &Insn::Throw { val, .. } | &Insn::Test { val } @@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> { let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id }); state.stack_push(insn_id); } + YARVINSN_toregexp => { + // First arg contains the options (multiline, extended, ignorecase) used to create the regexp + let opt = get_arg(pc, 0).as_usize(); + let count = get_arg(pc, 1).as_usize(); + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + let values = state.stack_pop_n(count)?; + let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id }); + state.stack_push(insn_id); + } YARVINSN_newarray => { let count = get_arg(pc, 0).as_usize(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); @@ -5331,6 +5379,47 @@ mod tests { } #[test] + fn test_toregexp() { + eval(r##" + def test = /#{1}#{2}#{3}/ + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@<compiled>:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v12:Fixnum[3] = Const Value(3) + v14:BasicObject = ObjToString v12 + v16:String = AnyToString v12, str: v14 + v18:RegexpExact = ToRegexp v6, v11, v16 + Return v18 + "#]]); + } + + #[test] + fn test_toregexp_with_options() { + eval(r##" + def test = /#{1}#{2}/mixn + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@<compiled>:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING + Return v13 + "#]]); + } + + #[test] fn throw() { eval(" define_method(:throw_return) { return 1 } |
