summaryrefslogtreecommitdiff
path: root/zjit/src
diff options
context:
space:
mode:
authorDaniel Colson <danieljamescolson@gmail.com>2025-08-19 10:02:13 -0400
committerGitHub <noreply@github.com>2025-08-19 10:02:13 -0400
commitfc5ee247d5307a292cd2b083ce82fc24005bb385 (patch)
treed3d6e0514685e9d258167d203f07bbbeddb87475 /zjit/src
parent6281806fc6dee87ec39a545ce0a157d740446674 (diff)
ZJIT: Compile toregexp (#14200)
`toregexp` is fairly similar to `concatstrings`, so this commit extracts a helper for pushing and popping operands on the native stack. There's probably opportunity to move some of this into lir (e.g. Alan suggested a push_many that could use STP on ARM to push 2 at a time), but I might save that for another day.
Diffstat (limited to 'zjit/src')
-rw-r--r--zjit/src/codegen.rs41
-rw-r--r--zjit/src/cruby_bindings.inc.rs5
-rw-r--r--zjit/src/hir.rs89
3 files changed, 126 insertions, 9 deletions
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 5780a26357..37f10c92eb 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::StringConcat { strings, .. } if strings.is_empty() => return None,
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)),
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)),
+ Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)),
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)),
@@ -1595,36 +1596,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
})
}
-fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd {
+ let n = opnds.len();
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
- let n = strings.len();
let allocation_size = aligned_stack_bytes(n);
- asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n);
+ asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n);
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
let total_offset_from_base = (frame_size + allocation_size) as i32;
- for (idx, &string_opnd) in strings.iter().enumerate() {
+ for (idx, &opnd) in opnds.iter().enumerate() {
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
asm.mov(
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
- string_opnd
+ opnd
);
}
- let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base));
-
- let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr);
+ asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base))
+}
+fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) {
asm_comment!(asm, "restore C stack pointer");
+ let allocation_size = aligned_stack_bytes(opnds.len());
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
+}
+
+fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Opnd {
+ gen_prepare_non_leaf_call(jit, asm, state);
+
+ let first_opnd_ptr = gen_push_opnds(jit, asm, &values);
+
+ let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
+ let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into());
+ asm_ccall!(asm, rb_ary_clear, tmp_ary);
+
+ gen_pop_opnds(asm, &values);
+
+ result
+}
+
+fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
+ gen_prepare_non_leaf_call(jit, asm, state);
+
+ let first_string_ptr = gen_push_opnds(jit, asm, &strings);
+ let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr);
+ gen_pop_opnds(asm, &strings);
result
}
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 5c939fabe7..524b06b580 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -30,6 +30,11 @@ impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> {
fmt.write_str("__IncompleteArrayField")
}
}
+pub const ONIG_OPTION_IGNORECASE: u32 = 1;
+pub const ONIG_OPTION_EXTEND: u32 = 2;
+pub const ONIG_OPTION_MULTILINE: u32 = 4;
+pub const ARG_ENCODING_FIXED: u32 = 16;
+pub const ARG_ENCODING_NONE: u32 = 32;
pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index afe358ec1d..7c7e09663b 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -473,6 +473,9 @@ pub enum Insn {
StringIntern { val: InsnId, state: InsnId },
StringConcat { strings: Vec<InsnId>, state: InsnId },
+ /// Combine count stack values into a regexp
+ ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId },
+
/// Put special object (VMCORE, CBASE, etc.) based on value_type
PutSpecialObject { value_type: SpecialObjectType },
@@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> {
ptr_map: &'a PtrPrintMap,
}
+static REGEXP_FLAGS: &[(u32, &str)] = &[
+ (ONIG_OPTION_MULTILINE, "MULTILINE"),
+ (ONIG_OPTION_IGNORECASE, "IGNORECASE"),
+ (ONIG_OPTION_EXTEND, "EXTENDED"),
+ (ARG_ENCODING_FIXED, "FIXEDENCODING"),
+ (ARG_ENCODING_NONE, "NOENCODING"),
+];
+
impl<'a> std::fmt::Display for InsnPrinter<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.inner {
@@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Ok(())
}
+ Insn::ToRegexp { values, opt, .. } => {
+ write!(f, "ToRegexp")?;
+ let mut prefix = " ";
+ for value in values {
+ write!(f, "{prefix}{value}")?;
+ prefix = ", ";
+ }
+
+ let opt = *opt as u32;
+ if opt != 0 {
+ write!(f, ", ")?;
+ let mut sep = "";
+ for (flag, name) in REGEXP_FLAGS {
+ if opt & flag != 0 {
+ write!(f, "{sep}{name}")?;
+ sep = "|";
+ }
+ }
+ }
+
+ Ok(())
+ }
Insn::Test { val } => { write!(f, "Test {val}") }
Insn::IsNil { val } => { write!(f, "IsNil {val}") }
Insn::Jump(target) => { write!(f, "Jump {target}") }
@@ -1179,6 +1212,7 @@ impl Function {
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
+ &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
&Test { val } => Test { val: find!(val) },
&IsNil { val } => IsNil { val: find!(val) },
&Jump(ref target) => Jump(find_branch_edge!(target)),
@@ -1305,6 +1339,7 @@ impl Function {
Insn::StringCopy { .. } => types::StringExact,
Insn::StringIntern { .. } => types::Symbol,
Insn::StringConcat { .. } => types::StringExact,
+ Insn::ToRegexp { .. } => types::RegexpExact,
Insn::NewArray { .. } => types::ArrayExact,
Insn::ArrayDup { .. } => types::ArrayExact,
Insn::NewHash { .. } => types::HashExact,
@@ -1939,6 +1974,10 @@ impl Function {
worklist.extend(strings);
worklist.push_back(state);
}
+ &Insn::ToRegexp { ref values, state, .. } => {
+ worklist.extend(values);
+ worklist.push_back(state);
+ }
| &Insn::Return { val }
| &Insn::Throw { val, .. }
| &Insn::Test { val }
@@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id });
state.stack_push(insn_id);
}
+ YARVINSN_toregexp => {
+ // First arg contains the options (multiline, extended, ignorecase) used to create the regexp
+ let opt = get_arg(pc, 0).as_usize();
+ let count = get_arg(pc, 1).as_usize();
+ let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
+ let values = state.stack_pop_n(count)?;
+ let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id });
+ state.stack_push(insn_id);
+ }
YARVINSN_newarray => {
let count = get_arg(pc, 0).as_usize();
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
@@ -5331,6 +5379,47 @@ mod tests {
}
#[test]
+ fn test_toregexp() {
+ eval(r##"
+ def test = /#{1}#{2}#{3}/
+ "##);
+ assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
+ fn test@<compiled>:2:
+ bb0(v0:BasicObject):
+ v2:Fixnum[1] = Const Value(1)
+ v4:BasicObject = ObjToString v2
+ v6:String = AnyToString v2, str: v4
+ v7:Fixnum[2] = Const Value(2)
+ v9:BasicObject = ObjToString v7
+ v11:String = AnyToString v7, str: v9
+ v12:Fixnum[3] = Const Value(3)
+ v14:BasicObject = ObjToString v12
+ v16:String = AnyToString v12, str: v14
+ v18:RegexpExact = ToRegexp v6, v11, v16
+ Return v18
+ "#]]);
+ }
+
+ #[test]
+ fn test_toregexp_with_options() {
+ eval(r##"
+ def test = /#{1}#{2}/mixn
+ "##);
+ assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
+ fn test@<compiled>:2:
+ bb0(v0:BasicObject):
+ v2:Fixnum[1] = Const Value(1)
+ v4:BasicObject = ObjToString v2
+ v6:String = AnyToString v2, str: v4
+ v7:Fixnum[2] = Const Value(2)
+ v9:BasicObject = ObjToString v7
+ v11:String = AnyToString v7, str: v9
+ v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING
+ Return v13
+ "#]]);
+ }
+
+ #[test]
fn throw() {
eval("
define_method(:throw_return) { return 1 }