summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/re.h5
-rw-r--r--re.c5
-rw-r--r--test/ruby/test_zjit.rb8
-rw-r--r--zjit/bindgen/src/main.rs7
-rw-r--r--zjit/src/codegen.rs41
-rw-r--r--zjit/src/cruby_bindings.inc.rs5
-rw-r--r--zjit/src/hir.rs89
7 files changed, 146 insertions, 14 deletions
diff --git a/internal/re.h b/internal/re.h
index 2788f8b42a..593e5c464f 100644
--- a/internal/re.h
+++ b/internal/re.h
@@ -25,4 +25,9 @@ int rb_match_count(VALUE match);
VALUE rb_reg_new_ary(VALUE ary, int options);
VALUE rb_reg_last_defined(VALUE match);
+#define ARG_REG_OPTION_MASK \
+ (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
+#define ARG_ENCODING_FIXED 16
+#define ARG_ENCODING_NONE 32
+
#endif /* INTERNAL_RE_H */
diff --git a/re.c b/re.c
index 9348622eea..13d7f0ef9e 100644
--- a/re.c
+++ b/re.c
@@ -290,11 +290,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
#define KCODE_FIXED FL_USER4
-#define ARG_REG_OPTION_MASK \
- (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
-#define ARG_ENCODING_FIXED 16
-#define ARG_ENCODING_NONE 32
-
static int
char_to_option(int c)
{
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 96ac99b6db..e18333a58f 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -1847,6 +1847,14 @@ class TestZJIT < Test::Unit::TestCase
}, insns: [:concatstrings]
end
+ def test_regexp_interpolation
+ assert_compiles '/123/', %q{
+ def test = /#{1}#{2}#{3}/
+
+ test
+ }, insns: [:toregexp]
+ end
+
private
# Assert that every method call in `test_script` can be compiled by ZJIT
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index 77299c2657..59b7f9737e 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -259,6 +259,13 @@ fn main() {
// From internal/re.h
.allowlist_function("rb_reg_new_ary")
+ .allowlist_var("ARG_ENCODING_FIXED")
+ .allowlist_var("ARG_ENCODING_NONE")
+
+ // From include/ruby/onigmo.h
+ .allowlist_var("ONIG_OPTION_IGNORECASE")
+ .allowlist_var("ONIG_OPTION_EXTEND")
+ .allowlist_var("ONIG_OPTION_MULTILINE")
// `ruby_value_type` is a C enum and this stops it from
// prefixing all the members with the name of the type
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 5780a26357..37f10c92eb 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::StringConcat { strings, .. } if strings.is_empty() => return None,
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)),
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)),
+ Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)),
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)),
@@ -1595,36 +1596,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
})
}
-fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd {
+ let n = opnds.len();
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
- let n = strings.len();
let allocation_size = aligned_stack_bytes(n);
- asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n);
+ asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n);
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
let total_offset_from_base = (frame_size + allocation_size) as i32;
- for (idx, &string_opnd) in strings.iter().enumerate() {
+ for (idx, &opnd) in opnds.iter().enumerate() {
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
asm.mov(
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
- string_opnd
+ opnd
);
}
- let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base));
-
- let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr);
+ asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base))
+}
+fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) {
asm_comment!(asm, "restore C stack pointer");
+ let allocation_size = aligned_stack_bytes(opnds.len());
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
+}
+
+fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Opnd {
+ gen_prepare_non_leaf_call(jit, asm, state);
+
+ let first_opnd_ptr = gen_push_opnds(jit, asm, &values);
+
+ let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
+ let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into());
+ asm_ccall!(asm, rb_ary_clear, tmp_ary);
+
+ gen_pop_opnds(asm, &values);
+
+ result
+}
+
+fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
+ gen_prepare_non_leaf_call(jit, asm, state);
+
+ let first_string_ptr = gen_push_opnds(jit, asm, &strings);
+ let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr);
+ gen_pop_opnds(asm, &strings);
result
}
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 5c939fabe7..524b06b580 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -30,6 +30,11 @@ impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> {
fmt.write_str("__IncompleteArrayField")
}
}
+pub const ONIG_OPTION_IGNORECASE: u32 = 1;
+pub const ONIG_OPTION_EXTEND: u32 = 2;
+pub const ONIG_OPTION_MULTILINE: u32 = 4;
+pub const ARG_ENCODING_FIXED: u32 = 16;
+pub const ARG_ENCODING_NONE: u32 = 32;
pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index afe358ec1d..7c7e09663b 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -473,6 +473,9 @@ pub enum Insn {
StringIntern { val: InsnId, state: InsnId },
StringConcat { strings: Vec<InsnId>, state: InsnId },
+ /// Combine count stack values into a regexp
+ ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId },
+
/// Put special object (VMCORE, CBASE, etc.) based on value_type
PutSpecialObject { value_type: SpecialObjectType },
@@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> {
ptr_map: &'a PtrPrintMap,
}
+static REGEXP_FLAGS: &[(u32, &str)] = &[
+ (ONIG_OPTION_MULTILINE, "MULTILINE"),
+ (ONIG_OPTION_IGNORECASE, "IGNORECASE"),
+ (ONIG_OPTION_EXTEND, "EXTENDED"),
+ (ARG_ENCODING_FIXED, "FIXEDENCODING"),
+ (ARG_ENCODING_NONE, "NOENCODING"),
+];
+
impl<'a> std::fmt::Display for InsnPrinter<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.inner {
@@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Ok(())
}
+ Insn::ToRegexp { values, opt, .. } => {
+ write!(f, "ToRegexp")?;
+ let mut prefix = " ";
+ for value in values {
+ write!(f, "{prefix}{value}")?;
+ prefix = ", ";
+ }
+
+ let opt = *opt as u32;
+ if opt != 0 {
+ write!(f, ", ")?;
+ let mut sep = "";
+ for (flag, name) in REGEXP_FLAGS {
+ if opt & flag != 0 {
+ write!(f, "{sep}{name}")?;
+ sep = "|";
+ }
+ }
+ }
+
+ Ok(())
+ }
Insn::Test { val } => { write!(f, "Test {val}") }
Insn::IsNil { val } => { write!(f, "IsNil {val}") }
Insn::Jump(target) => { write!(f, "Jump {target}") }
@@ -1179,6 +1212,7 @@ impl Function {
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
+ &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
&Test { val } => Test { val: find!(val) },
&IsNil { val } => IsNil { val: find!(val) },
&Jump(ref target) => Jump(find_branch_edge!(target)),
@@ -1305,6 +1339,7 @@ impl Function {
Insn::StringCopy { .. } => types::StringExact,
Insn::StringIntern { .. } => types::Symbol,
Insn::StringConcat { .. } => types::StringExact,
+ Insn::ToRegexp { .. } => types::RegexpExact,
Insn::NewArray { .. } => types::ArrayExact,
Insn::ArrayDup { .. } => types::ArrayExact,
Insn::NewHash { .. } => types::HashExact,
@@ -1939,6 +1974,10 @@ impl Function {
worklist.extend(strings);
worklist.push_back(state);
}
+ &Insn::ToRegexp { ref values, state, .. } => {
+ worklist.extend(values);
+ worklist.push_back(state);
+ }
| &Insn::Return { val }
| &Insn::Throw { val, .. }
| &Insn::Test { val }
@@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id });
state.stack_push(insn_id);
}
+ YARVINSN_toregexp => {
+ // First arg contains the options (multiline, extended, ignorecase) used to create the regexp
+ let opt = get_arg(pc, 0).as_usize();
+ let count = get_arg(pc, 1).as_usize();
+ let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
+ let values = state.stack_pop_n(count)?;
+ let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id });
+ state.stack_push(insn_id);
+ }
YARVINSN_newarray => {
let count = get_arg(pc, 0).as_usize();
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
@@ -5331,6 +5379,47 @@ mod tests {
}
#[test]
+ fn test_toregexp() {
+ eval(r##"
+ def test = /#{1}#{2}#{3}/
+ "##);
+ assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
+ fn test@<compiled>:2:
+ bb0(v0:BasicObject):
+ v2:Fixnum[1] = Const Value(1)
+ v4:BasicObject = ObjToString v2
+ v6:String = AnyToString v2, str: v4
+ v7:Fixnum[2] = Const Value(2)
+ v9:BasicObject = ObjToString v7
+ v11:String = AnyToString v7, str: v9
+ v12:Fixnum[3] = Const Value(3)
+ v14:BasicObject = ObjToString v12
+ v16:String = AnyToString v12, str: v14
+ v18:RegexpExact = ToRegexp v6, v11, v16
+ Return v18
+ "#]]);
+ }
+
+ #[test]
+ fn test_toregexp_with_options() {
+ eval(r##"
+ def test = /#{1}#{2}/mixn
+ "##);
+ assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
+ fn test@<compiled>:2:
+ bb0(v0:BasicObject):
+ v2:Fixnum[1] = Const Value(1)
+ v4:BasicObject = ObjToString v2
+ v6:String = AnyToString v2, str: v4
+ v7:Fixnum[2] = Const Value(2)
+ v9:BasicObject = ObjToString v7
+ v11:String = AnyToString v7, str: v9
+ v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING
+ Return v13
+ "#]]);
+ }
+
+ #[test]
fn throw() {
eval("
define_method(:throw_return) { return 1 }