diff options
Diffstat (limited to 'yjit/src')
51 files changed, 15314 insertions, 6334 deletions
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs index ff9b2c8a2d..70a439afd5 100644 --- a/yjit/src/asm/arm64/arg/bitmask_imm.rs +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -42,7 +42,7 @@ impl TryFrom<u64> for BitmaskImmediate { /// Attempt to convert a u64 into a BitmaskImmediate. /// /// The implementation here is largely based on this blog post: - /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ + /// <https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/> fn try_from(value: u64) -> Result<Self, Self::Error> { if value == 0 || value == u64::MAX { return Err(()); @@ -106,7 +106,7 @@ mod tests { #[test] fn test_failures() { - vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + [5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { assert!(BitmaskImmediate::try_from(imm).is_err()); }); } diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs index bb9ce570c3..f711b8b0d8 100644 --- a/yjit/src/asm/arm64/arg/condition.rs +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -49,4 +49,4 @@ impl Condition { } } -}
\ No newline at end of file +} diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs index 41d71920cb..6229d5c1fd 100644 --- a/yjit/src/asm/arm64/arg/sys_reg.rs +++ b/yjit/src/asm/arm64/arg/sys_reg.rs @@ -1,6 +1,6 @@ /// The encoded representation of an A64 system register. -/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/ +/// <https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/> pub enum SystemRegister { - /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en + /// <https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en> NZCV = 0b1_011_0100_0010_000 } diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs index 5ce497209c..dce9affedf 100644 --- a/yjit/src/asm/arm64/inst/atomic.rs +++ b/yjit/src/asm/arm64/inst/atomic.rs @@ -43,13 +43,13 @@ pub struct Atomic { impl Atomic { /// LDADDAL - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en> pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { Self { rt, rn, rs, size: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<Atomic> for u32 { diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs index f15ef2a9b0..14fcb2e9fd 100644 --- a/yjit/src/asm/arm64/inst/branch.rs +++ b/yjit/src/asm/arm64/inst/branch.rs @@ -28,25 +28,25 @@ pub struct Branch { impl Branch { /// BR - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en> pub fn br(rn: u8) -> Self { Self { rn, op: Op::BR } } /// BLR - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en> pub fn blr(rn: u8) -> Self { Self { rn, op: Op::BLR } } /// RET - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en> pub fn ret(rn: u8) -> Self { Self { rn, op: Op::RET } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en> const FAMILY: u32 = 0b101; impl From<Branch> for u32 { diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs index fcc07f69aa..266e9ccb31 100644 --- a/yjit/src/asm/arm64/inst/branch_cond.rs +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -19,13 +19,13 @@ pub struct BranchCond { impl BranchCond { /// B.cond - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-> pub fn bcond(cond: u8, offset: InstructionOffset) -> Self { Self { cond, offset } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en> const FAMILY: u32 = 0b101; impl From<BranchCond> for u32 { diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs index be4920ac76..d66a35c4c6 100644 --- a/yjit/src/asm/arm64/inst/breakpoint.rs +++ b/yjit/src/asm/arm64/inst/breakpoint.rs @@ -13,13 +13,13 @@ pub struct Breakpoint { impl Breakpoint { /// BRK - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction-> pub fn brk(imm16: u16) -> Self { Self { imm16 } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control> const FAMILY: u32 = 0b101; impl From<Breakpoint> for u32 { diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs index 74debac7f7..fd26d09f8a 100644 --- a/yjit/src/asm/arm64/inst/call.rs +++ b/yjit/src/asm/arm64/inst/call.rs @@ -29,19 +29,19 @@ pub struct Call { impl Call { /// B - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-> pub fn b(offset: InstructionOffset) -> Self { Self { offset, op: Op::Branch } } /// BL - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en> pub fn bl(offset: InstructionOffset) -> Self { Self { offset, op: Op::BranchWithLink } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en> const FAMILY: u32 = 0b101; impl From<Call> for u32 { diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs index e1950e95b4..1e26c7408b 100644 --- a/yjit/src/asm/arm64/inst/conditional.rs +++ b/yjit/src/asm/arm64/inst/conditional.rs @@ -28,13 +28,13 @@ pub struct Conditional { impl Conditional { /// CSEL - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en> pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self { Self { rd, rn, cond, rm, sf: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel> const FAMILY: u32 = 0b101; impl From<Conditional> for u32 { diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs index b474b00a52..ea71705478 100644 --- a/yjit/src/asm/arm64/inst/data_imm.rs +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -44,37 +44,37 @@ pub struct DataImm { impl DataImm { /// ADD (immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en> pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() } } /// ADDS (immediate, set flags) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en> pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() } } /// CMP (immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en> pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { Self::subs(31, rn, imm, num_bits) } /// SUB (immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en> pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() } } /// SUBS (immediate, set flags) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en> pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en> const FAMILY: u32 = 0b1000; impl From<DataImm> for u32 { diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs index a742121f1f..ed4afa956b 100644 --- a/yjit/src/asm/arm64/inst/data_reg.rs +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -57,7 +57,7 @@ pub struct DataReg { impl DataReg { /// ADD (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en> pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, @@ -72,7 +72,7 @@ impl DataReg { } /// ADDS (shifted register, set flags) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en> pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, @@ -87,13 +87,13 @@ impl DataReg { } /// CMP (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en> pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self { Self::subs(31, rn, rm, num_bits) } /// SUB (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en> pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, @@ -108,7 +108,7 @@ impl DataReg { } /// SUBS (shifted register, set flags) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en> pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, @@ -123,7 +123,7 @@ impl DataReg { } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en> const FAMILY: u32 = 0b0101; impl From<DataReg> for u32 { diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs index 0ddae8e8de..863ac947dd 100644 --- a/yjit/src/asm/arm64/inst/halfword_imm.rs +++ b/yjit/src/asm/arm64/inst/halfword_imm.rs @@ -53,43 +53,43 @@ pub struct HalfwordImm { impl HalfwordImm { /// LDRH - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--> pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self { Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load } } /// LDRH (pre-index) - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--> pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load } } /// LDRH (post-index) - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--> pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load } } /// STRH - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--> pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self { Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store } } /// STRH (pre-index) - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--> pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store } } /// STRH (post-index) - /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--> pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b111100; impl From<HalfwordImm> for u32 { diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs index 3eade205c8..817e893553 100644 --- a/yjit/src/asm/arm64/inst/load_literal.rs +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -40,13 +40,13 @@ pub struct LoadLiteral { impl LoadLiteral { /// LDR (load literal) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en> pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self { Self { rt, offset, opc: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<LoadLiteral> for u32 { diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs index 3426b9ba5f..3d94e8da1f 100644 --- a/yjit/src/asm/arm64/inst/load_register.rs +++ b/yjit/src/asm/arm64/inst/load_register.rs @@ -61,13 +61,13 @@ pub struct LoadRegister { impl LoadRegister { /// LDR - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en> pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<LoadRegister> for u32 { diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs index b5c8a3c294..e27909ae35 100644 --- a/yjit/src/asm/arm64/inst/load_store.rs +++ b/yjit/src/asm/arm64/inst/load_store.rs @@ -66,67 +66,67 @@ pub struct LoadStore { impl LoadStore { /// LDR (immediate, post-index) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--> pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() } } /// LDR (immediate, pre-index) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--> pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() } } /// LDUR (load register, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en> pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() } } /// LDURH Load Register Halfword (unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en> pub fn ldurh(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size16 } } /// LDURB (load register, byte, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en> pub fn ldurb(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size8 } } /// LDURSW (load register, unscaled, signed) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en> pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 } } /// STR (immediate, post-index) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--> pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() } } /// STR (immediate, pre-index) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--> pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() } } /// STUR (store register, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en> pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() } } /// STURH (store register, halfword, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en> pub fn sturh(rt: u8, rn: u8, imm9: i16) -> Self { Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: Size::Size16 } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<LoadStore> for u32 { diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs index 8216c2200a..1106b4cb37 100644 --- a/yjit/src/asm/arm64/inst/load_store_exclusive.rs +++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs @@ -52,19 +52,19 @@ pub struct LoadStoreExclusive { impl LoadStoreExclusive { /// LDAXR - /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register- + /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-> pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self { Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() } } /// STLXR - /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register- + /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-> pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { Self { rt, rn, rs, op: Op::Store, size: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<LoadStoreExclusive> for u32 { diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index b24916f8a5..d57ad5f5b7 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -44,43 +44,43 @@ pub struct LogicalImm { impl LogicalImm { /// AND (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en> pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } } /// ANDS (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en> pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } } /// EOR (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate--> pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() } } /// MOV (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en> pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() } } /// ORR (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--> pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() } } /// TST (bitmask immediate) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en> pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self::ands(31, rn, imm, num_bits) } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm> const FAMILY: u32 = 0b1001; impl From<LogicalImm> for u32 { diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index a96805c9f9..18edff606f 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -70,55 +70,55 @@ pub struct LogicalReg { impl LogicalReg { /// AND (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en> pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } } /// ANDS (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en> pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } } /// EOR (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register--> pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() } } /// MOV (register) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en> pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// MVN (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en> pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// ORN (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register--> pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// ORR (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register--> pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// TST (shifted register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en> pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en> const FAMILY: u32 = 0b0101; impl From<LogicalReg> for u32 { diff --git a/yjit/src/asm/arm64/inst/madd.rs b/yjit/src/asm/arm64/inst/madd.rs new file mode 100644 index 0000000000..71f2ab230a --- /dev/null +++ b/yjit/src/asm/arm64/inst/madd.rs @@ -0,0 +1,73 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 multiply-add instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 1 0 1 1 0 0 0 0 | +/// | sf rm.............. ra.............. rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct MAdd { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The number of the third general-purpose source register. + ra: u8, + + /// The number of the second general-purpose source register. + rm: u8, + + /// The size of the registers of this instruction. + sf: Sf +} + +impl MAdd { + /// MUL + /// <https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/MUL--Multiply--an-alias-of-MADD-> + pub fn mul(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, ra: 0b11111, rm, sf: num_bits.into() } + } +} + +impl From<MAdd> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: MAdd) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (0b11011 << 24) + | ((inst.rm as u32) << 16) + | ((inst.ra as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From<MAdd> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: MAdd) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mul_32() { + let result: u32 = MAdd::mul(0, 1, 2, 32).into(); + assert_eq!(0x1B027C20, result); + } + + #[test] + fn test_mul_64() { + let result: u32 = MAdd::mul(0, 1, 2, 64).into(); + assert_eq!(0x9B027C20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 9821e6a334..bfffd914ef 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -16,6 +16,8 @@ mod load_store; mod load_store_exclusive; mod logical_imm; mod logical_reg; +mod madd; +mod smulh; mod mov; mod nop; mod pc_rel; @@ -40,6 +42,8 @@ pub use load_store::LoadStore; pub use load_store_exclusive::LoadStoreExclusive; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; +pub use madd::MAdd; +pub use smulh::SMulH; pub use mov::Mov; pub use nop::Nop; pub use pc_rel::PCRelative; diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs index e7cb9215b0..eae4565c3a 100644 --- a/yjit/src/asm/arm64/inst/mov.rs +++ b/yjit/src/asm/arm64/inst/mov.rs @@ -56,19 +56,19 @@ pub struct Mov { impl Mov { /// MOVK - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en> pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } } /// MOVZ - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en> pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en> const FAMILY: u32 = 0b1000; impl From<Mov> for u32 { diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs index d58b3574a9..081d8558f5 100644 --- a/yjit/src/asm/arm64/inst/nop.rs +++ b/yjit/src/asm/arm64/inst/nop.rs @@ -10,7 +10,7 @@ pub struct Nop; impl Nop { /// NOP - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation- + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation-> pub fn nop() -> Self { Self {} } diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs index bd1a2b9367..2ea586a778 100644 --- a/yjit/src/asm/arm64/inst/pc_rel.rs +++ b/yjit/src/asm/arm64/inst/pc_rel.rs @@ -30,19 +30,19 @@ pub struct PCRelative { impl PCRelative { /// ADR - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address- + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address-> pub fn adr(rd: u8, imm: i32) -> Self { Self { rd, imm, op: Op::ADR } } /// ADRP - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page-> pub fn adrp(rd: u8, imm: i32) -> Self { Self { rd, imm: imm >> 12, op: Op::ADRP } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en> const FAMILY: u32 = 0b1000; impl From<PCRelative> for u32 { diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs index 87690e3b4a..9bffcd8479 100644 --- a/yjit/src/asm/arm64/inst/reg_pair.rs +++ b/yjit/src/asm/arm64/inst/reg_pair.rs @@ -68,49 +68,49 @@ impl RegisterPair { } /// LDP (signed offset) - /// LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}] - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + /// `LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en> pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits) } /// LDP (pre-index) - /// LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + /// `LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en> pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits) } /// LDP (post-index) - /// LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm> - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + /// `LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en> pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits) } /// STP (signed offset) - /// STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}] - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + /// `STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en> pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits) } /// STP (pre-index) - /// STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + /// `STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en> pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits) } /// STP (post-index) - /// STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm> - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + /// `STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>` + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en> pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits) } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en> const FAMILY: u32 = 0b0100; impl From<RegisterPair> for u32 { diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs index 8602998980..12944ba722 100644 --- a/yjit/src/asm/arm64/inst/sbfm.rs +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -32,7 +32,7 @@ pub struct SBFM { impl SBFM { /// ASR - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en> pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { let (imms, n) = if num_bits == 64 { (0b111111, true) @@ -44,13 +44,13 @@ impl SBFM { } /// SXTW - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en> pub fn sxtw(rd: u8, rn: u8) -> Self { Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield> const FAMILY: u32 = 0b1001; impl From<SBFM> for u32 { diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs index 3d2685a997..9dac9a1408 100644 --- a/yjit/src/asm/arm64/inst/shift_imm.rs +++ b/yjit/src/asm/arm64/inst/shift_imm.rs @@ -38,13 +38,13 @@ pub struct ShiftImm { impl ShiftImm { /// LSL (immediate) - /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en> pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() } } /// LSR (immediate) - /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en> pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() } } @@ -85,7 +85,7 @@ impl ShiftImm { } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield> const FAMILY: u32 = 0b10011; impl From<ShiftImm> for u32 { diff --git a/yjit/src/asm/arm64/inst/smulh.rs b/yjit/src/asm/arm64/inst/smulh.rs new file mode 100644 index 0000000000..f355cb6531 --- /dev/null +++ b/yjit/src/asm/arm64/inst/smulh.rs @@ -0,0 +1,60 @@ +/// The struct that represents an A64 signed multiply high instruction +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 1 0 1 1 0 1 0 0 | +/// | rm.............. ra.............. rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SMulH { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The number of the third general-purpose source register. + ra: u8, + + /// The number of the second general-purpose source register. + rm: u8, +} + +impl SMulH { + /// SMULH + /// <https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/SMULH--Signed-Multiply-High-> + pub fn smulh(rd: u8, rn: u8, rm: u8) -> Self { + Self { rd, rn, ra: 0b11111, rm } + } +} + +impl From<SMulH> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SMulH) -> Self { + 0 + | (0b10011011010 << 21) + | ((inst.rm as u32) << 16) + | ((inst.ra as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From<SMulH> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SMulH) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_smulh() { + let result: u32 = SMulH::smulh(0, 1, 2).into(); + assert_eq!(0x9b427c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs index 108737a870..7191dfbfd9 100644 --- a/yjit/src/asm/arm64/inst/sys_reg.rs +++ b/yjit/src/asm/arm64/inst/sys_reg.rs @@ -32,19 +32,19 @@ pub struct SysReg { impl SysReg { /// MRS (register) - /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en + /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en> pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self { SysReg { rt, systemreg, l: L::MRS } } /// MSR (register) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en> pub fn msr(systemreg: SystemRegister, rt: u8) -> Self { SysReg { rt, systemreg, l: L::MSR } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove> const FAMILY: u32 = 0b110101010001; impl From<SysReg> for u32 { diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs index c57a05ad2b..f7aeca70fd 100644 --- a/yjit/src/asm/arm64/inst/test_bit.rs +++ b/yjit/src/asm/arm64/inst/test_bit.rs @@ -60,19 +60,19 @@ pub struct TestBit { impl TestBit { /// TBNZ - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en> pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self { Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() } } /// TBZ - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en + /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en> pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self { Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() } } } -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en> const FAMILY: u32 = 0b11011; impl From<TestBit> for u32 { diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 9bc697ecfb..18b5270f9d 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -186,7 +186,7 @@ pub fn asr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { SBFM::asr(rd.reg_no, rn.reg_no, shift.try_into().unwrap(), rd.num_bits).into() }, - _ => panic!("Invalid operand combination to asr instruction."), + _ => panic!("Invalid operand combination to asr instruction: asr {:?}, {:?}, {:?}", rd, rn, shift), }; cb.write_bytes(&bytes); @@ -215,6 +215,9 @@ pub const fn bcond_offset_fits_bits(offset: i64) -> bool { imm_fits_bits(offset, 19) } +/// CBZ and CBNZ also have a limit of 19 bits for the branch offset. +pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits; + /// B.cond - branch to target if condition is true pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) { assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less."); @@ -254,7 +257,7 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { /// BRK - create a breakpoint pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { let bytes: [u8; 4] = match imm16 { - A64Opnd::None => Breakpoint::brk(0).into(), + A64Opnd::None => Breakpoint::brk(0xf000).into(), A64Opnd::UImm(imm16) => { assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); Breakpoint::brk(imm16 as u16).into() @@ -276,6 +279,9 @@ pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() }, + (A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + DataImm::cmp(rn.reg_no, (imm12 as u64).try_into().unwrap(), rn.num_bits).into() + }, (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into() }, @@ -699,6 +705,35 @@ pub fn msr(cb: &mut CodeBlock, systemregister: SystemRegister, rt: A64Opnd) { cb.write_bytes(&bytes); } +/// MUL - multiply two registers, put the result in a third register +pub fn mul(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + MAdd::mul(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mul instruction") + }; + + cb.write_bytes(&bytes); +} + +/// SMULH - multiply two 64-bit registers to produce a 128-bit result, put the high 64-bits of the result into rd +pub fn smulh(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + assert!(rd.num_bits == 64, "smulh only applicable to 64-bit registers"); + + SMulH::smulh(rd.reg_no, rn.reg_no, rm.reg_no).into() + }, + _ => panic!("Invalid operand combination to mul instruction") + }; + + cb.write_bytes(&bytes); +} + /// MVN - move a value in a register to another register, negating it pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rm) { @@ -1064,6 +1099,48 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// CBZ - branch if a register is zero +pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) { + assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits"); + let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt { + cbz_cbnz(rt.num_bits, false, offset, rt.reg_no) + } else { + panic!("Invalid operand combination to cbz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// CBNZ - branch if a register is non-zero +pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) { + assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits"); + let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt { + cbz_cbnz(rt.num_bits, true, offset, rt.reg_no) + } else { + panic!("Invalid operand combination to cbnz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ) +/// with `op=1`. +/// +/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-> +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | sf 0 1 1 0 1 0 op | +/// | imm19........................................................... Rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] { + ((Sf::from(num_bits) as u32) << 31 | + 0b11010 << 25 | + u32::from(op) << 24 | + truncate_imm::<_, 19>(offset) << 5 | + rt as u32).to_le_bytes() +} + #[cfg(test)] mod tests { use super::*; @@ -1134,7 +1211,7 @@ mod tests { } #[test] - fn test_adds_imm_negatve() { + fn test_adds_imm_negative() { check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); } @@ -1159,7 +1236,7 @@ mod tests { } #[test] - fn test_and_32b_immedaite() { + fn test_and_32b_immediate() { check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff))); } @@ -1239,8 +1316,26 @@ mod tests { } #[test] + fn test_cbz() { + let offset = InstructionOffset::from_insns(-1); + check_bytes("e0ffffb4e0ffff34", |cb| { + cbz(cb, X0, offset); + cbz(cb, W0, offset); + }); + } + + #[test] + fn test_cbnz() { + let offset = InstructionOffset::from_insns(2); + check_bytes("540000b554000035", |cb| { + cbnz(cb, X20, offset); + cbnz(cb, W20, offset); + }); + } + + #[test] fn test_brk_none() { - check_bytes("000020d4", |cb| brk(cb, A64Opnd::None)); + check_bytes("00003ed4", |cb| brk(cb, A64Opnd::None)); } #[test] @@ -1414,6 +1509,11 @@ mod tests { } #[test] + fn test_mul() { + check_bytes("6a7d0c9b", |cb| mul(cb, X10, X11, X12)); + } + + #[test] fn test_mvn() { check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); } diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 648041bbab..9ef675b34d 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,20 +1,14 @@ -use std::cell::RefCell; use std::fmt; use std::mem; use std::rc::Rc; -#[cfg(target_arch = "x86_64")] -use crate::backend::x86_64::JMP_PTR_BYTES; -#[cfg(target_arch = "aarch64")] -use crate::backend::arm64::JMP_PTR_BYTES; +use std::collections::BTreeMap; + use crate::core::IseqPayload; use crate::core::for_each_off_stack_iseq_payload; use crate::core::for_each_on_stack_iseq_payload; use crate::invariants::rb_yjit_tracing_invalidate_all; +use crate::stats::incr_counter; use crate::virtualmem::WriteError; - -#[cfg(feature = "disasm")] -use std::collections::BTreeMap; - use crate::codegen::CodegenGlobals; use crate::virtualmem::{VirtualMem, CodePtr}; @@ -24,9 +18,6 @@ pub mod x86_64; pub mod arm64; -/// Size of a code page in bytes. Each code page is split into an inlined and an outlined portion. -const CODE_PAGE_SIZE: usize = 16 * 1024; - // // TODO: need a field_size_of macro, to compute the size of a struct field in bytes // @@ -52,7 +43,12 @@ pub struct LabelRef { /// Block of memory into which instructions can be assembled pub struct CodeBlock { // Memory for storing the encoded instructions - mem_block: Rc<RefCell<VirtualMem>>, + mem_block: Rc<VirtualMem>, + + // Size of a code page in bytes. Each code page is split into an inlined and an outlined portion. + // Code GC collects code memory at this granularity. + // Must be a multiple of the OS page size. + page_size: usize, // Memory block size mem_size: usize, @@ -60,6 +56,12 @@ pub struct CodeBlock { // Current writing position write_pos: usize, + // The index of the last page with written bytes + last_page_idx: usize, + + // Total number of bytes written to past pages + past_page_bytes: usize, + // Size reserved for writing a jump to the next page page_end_reserve: usize, @@ -72,8 +74,10 @@ pub struct CodeBlock { // References to labels label_refs: Vec<LabelRef>, + // A switch for keeping comments. They take up memory. + keep_comments: bool, + // Comments for assembly instructions, if that feature is enabled - #[cfg(feature = "disasm")] asm_comments: BTreeMap<usize, Vec<String>>, // True for OutlinedCb @@ -83,6 +87,10 @@ pub struct CodeBlock { // for example, when there is not enough space or when a jump // target is too far away. dropped_bytes: bool, + + // Keeps track of what pages we can write to after code gc. + // `None` means all pages are free. + freed_pages: Rc<Option<Vec<usize>>>, } /// Set of CodeBlock label states. Used for recovering the previous state. @@ -93,38 +101,60 @@ pub struct LabelState { } impl CodeBlock { + /// Works for common AArch64 systems that have 16 KiB pages and + /// common x86_64 systems that use 4 KiB pages. + const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024; + /// Make a new CodeBlock - pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool) -> Self { - let mem_size = mem_block.borrow().virtual_region_size(); + pub fn new(mem_block: Rc<VirtualMem>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self { + // Pick the code page size + let system_page_size = mem_block.system_page_size(); + let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size { + Self::PREFERRED_CODE_PAGE_SIZE + } else { + system_page_size + }; + + let mem_size = mem_block.virtual_region_size(); let mut cb = Self { mem_block, mem_size, + page_size, write_pos: 0, - page_end_reserve: JMP_PTR_BYTES, + last_page_idx: 0, + past_page_bytes: 0, + page_end_reserve: 0, label_addrs: Vec::new(), label_names: Vec::new(), label_refs: Vec::new(), - #[cfg(feature = "disasm")] + keep_comments, asm_comments: BTreeMap::new(), outlined, dropped_bytes: false, + freed_pages, }; + cb.page_end_reserve = cb.jmp_ptr_bytes(); cb.write_pos = cb.page_start(); + + #[cfg(not(test))] + assert_eq!(0, mem_size % page_size, "partially in-bounds code pages should be impossible"); + cb } /// Move the CodeBlock to the next page. If it's on the furthest page, /// move the other CodeBlock to the next page as well. + #[must_use] pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool { let old_write_ptr = self.get_write_ptr(); self.set_write_ptr(base_ptr); // Use the freed_pages list if code GC has been used. Otherwise use the next page. - let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() { - let current_page = self.write_pos / CODE_PAGE_SIZE; + let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() { + let current_page = self.write_pos / self.page_size; freed_pages.iter().find(|&&page| current_page < page).map(|&page| page) } else { - Some(self.write_pos / CODE_PAGE_SIZE + 1) + Some(self.write_pos / self.page_size + 1) }; // Move self to the next page @@ -133,8 +163,10 @@ impl CodeBlock { return false; } - // Move the other CodeBlock to the same page if it'S on the furthest page - self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr); + // Move the other CodeBlock to the same page if it's on the furthest page + if cfg!(not(test)) { + self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr); + } return !self.dropped_bytes; } @@ -161,26 +193,32 @@ impl CodeBlock { // but you need to waste some space for keeping write_pos for every single page. // It doesn't seem necessary for performance either. So we're currently not doing it. let dst_pos = self.get_page_pos(page_idx); - if CODE_PAGE_SIZE * page_idx < self.mem_size && self.write_pos < dst_pos { + if self.write_pos < dst_pos { + // Fail if next page is out of bounds + if dst_pos >= self.mem_size { + return false; + } + // Reset dropped_bytes self.dropped_bytes = false; - // Convert dst_pos to dst_ptr - let src_pos = self.write_pos; - self.write_pos = dst_pos; - let dst_ptr = self.get_write_ptr(); - self.write_pos = src_pos; - self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES))); - // Generate jmp_ptr from src_pos to dst_pos + let dst_ptr = self.get_ptr(dst_pos); self.without_page_end_reserve(|cb| { + assert!(cb.has_capacity(cb.jmp_ptr_bytes())); cb.add_comment("jump to next page"); jmp_ptr(cb, dst_ptr); - assert!(!cb.has_dropped_bytes()); }); + // Update past_page_bytes for code_size() if this is a new page + if self.last_page_idx < page_idx { + self.past_page_bytes += self.current_page_bytes(); + } + // Start the next code from dst_pos self.write_pos = dst_pos; + // Update the last_page_idx if page_idx points to the furthest page + self.last_page_idx = usize::max(self.last_page_idx, page_idx); } !self.dropped_bytes } @@ -199,33 +237,39 @@ impl CodeBlock { } // Free the grouped pages at once - let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * CODE_PAGE_SIZE); - let batch_size = CODE_PAGE_SIZE * batch_idxs.len(); - self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32); + let start_ptr = self.mem_block.start_ptr().add_bytes(page_idx * self.page_size); + let batch_size = self.page_size * batch_idxs.len(); + self.mem_block.free_bytes(start_ptr, batch_size as u32); } } pub fn page_size(&self) -> usize { - CODE_PAGE_SIZE + self.page_size } pub fn mapped_region_size(&self) -> usize { - self.mem_block.borrow().mapped_region_size() + self.mem_block.mapped_region_size() + } + + /// Size of the region in bytes where writes could be attempted. + #[cfg(target_arch = "aarch64")] + pub fn virtual_region_size(&self) -> usize { + self.mem_block.virtual_region_size() } /// Return the number of code pages that have been mapped by the VirtualMemory. pub fn num_mapped_pages(&self) -> usize { // CodeBlock's page size != VirtualMem's page size on Linux, - // so mapped_region_size % CODE_PAGE_SIZE may not be 0 - ((self.mapped_region_size() - 1) / CODE_PAGE_SIZE) + 1 + // so mapped_region_size % self.page_size may not be 0 + ((self.mapped_region_size() - 1) / self.page_size) + 1 } /// Return the number of code pages that have been reserved by the VirtualMemory. pub fn num_virtual_pages(&self) -> usize { - let virtual_region_size = self.mem_block.borrow().virtual_region_size(); + let virtual_region_size = self.mem_block.virtual_region_size(); // CodeBlock's page size != VirtualMem's page size on Linux, - // so mapped_region_size % CODE_PAGE_SIZE may not be 0 - ((virtual_region_size - 1) / CODE_PAGE_SIZE) + 1 + // so mapped_region_size % self.page_size may not be 0 + ((virtual_region_size - 1) / self.page_size) + 1 } /// Return the number of code pages that have been freed and not used yet. @@ -234,18 +278,18 @@ impl CodeBlock { } pub fn has_freed_page(&self, page_idx: usize) -> bool { - CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed - self.write_pos < page_idx * CODE_PAGE_SIZE // and not written yet + self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed + self.write_pos < page_idx * self.page_size // and not written yet } /// Convert a page index to the write_pos for the page start. fn get_page_pos(&self, page_idx: usize) -> usize { - CODE_PAGE_SIZE * page_idx + self.page_start() + self.page_size * page_idx + self.page_start() } /// write_pos of the current page start pub fn page_start_pos(&self) -> usize { - self.get_write_pos() / CODE_PAGE_SIZE * CODE_PAGE_SIZE + self.page_start() + self.get_write_pos() / self.page_size * self.page_size + self.page_start() } /// Offset of each page where CodeBlock should start writing @@ -253,12 +297,12 @@ impl CodeBlock { let mut start = if self.inline() { 0 } else { - CODE_PAGE_SIZE / 2 + self.page_size / 2 }; if cfg!(debug_assertions) && !cfg!(test) { // Leave illegal instructions at the beginning of each page to assert // we're not accidentally crossing page boundaries. - start += JMP_PTR_BYTES; + start += self.jmp_ptr_bytes(); } start } @@ -266,9 +310,9 @@ impl CodeBlock { /// Offset of each page where CodeBlock should stop writing (exclusive) pub fn page_end(&self) -> usize { let page_end = if self.inline() { - CODE_PAGE_SIZE / 2 + self.page_size / 2 } else { - CODE_PAGE_SIZE + self.page_size }; page_end - self.page_end_reserve // reserve space to jump to the next page } @@ -282,63 +326,52 @@ impl CodeBlock { } /// Return the address ranges of a given address range that this CodeBlock can write. - #[cfg(any(feature = "disasm", target_arch = "aarch64"))] #[allow(dead_code)] pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> { - // CodegenGlobals is not initialized when we write initial ocb code - let freed_pages = if CodegenGlobals::has_instance() { - CodegenGlobals::get_freed_pages().as_ref() - } else { - None - }; - - let region_start = self.get_ptr(0).into_usize(); - let region_end = self.get_ptr(self.get_mem_size()).into_usize(); - let mut start = start_ptr.into_usize(); - let end = std::cmp::min(end_ptr.into_usize(), region_end); + let region_start = self.get_ptr(0).raw_addr(self); + let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self); + let mut start = start_ptr.raw_addr(self); + let end = std::cmp::min(end_ptr.raw_addr(self), region_end); + let freed_pages = self.freed_pages.as_ref().as_ref(); let mut addrs = vec![]; while start < end { - let page_idx = start.saturating_sub(region_start) / CODE_PAGE_SIZE; - let current_page = region_start + (page_idx * CODE_PAGE_SIZE); + let page_idx = start.saturating_sub(region_start) / self.page_size; + let current_page = region_start + (page_idx * self.page_size); let page_end = std::cmp::min(end, current_page + self.page_end()); // If code GC has been used, skip pages that are used by past on-stack code if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) { addrs.push((start, page_end)); } - start = current_page + CODE_PAGE_SIZE + self.page_start(); + start = current_page + self.page_size + self.page_start(); } addrs } - /// Return the code size that has been used by this CodeBlock. + /// Return the number of bytes written by this CodeBlock. pub fn code_size(&self) -> usize { - let mut size = 0; - let current_page_idx = self.write_pos / CODE_PAGE_SIZE; - for page_idx in 0..self.num_mapped_pages() { - if page_idx == current_page_idx { - // Count only actually used bytes for the current page. - size += (self.write_pos % CODE_PAGE_SIZE).saturating_sub(self.page_start()); - } else if !self.has_freed_page(page_idx) { - // Count an entire range for any non-freed pages that have been used. - size += self.page_end() - self.page_start() + self.page_end_reserve; - } - } - size + self.current_page_bytes() + self.past_page_bytes + } + + /// Return the number of bytes written to the current page. + fn current_page_bytes(&self) -> usize { + (self.write_pos % self.page_size).saturating_sub(self.page_start()) } /// Check if this code block has sufficient remaining capacity pub fn has_capacity(&self, num_bytes: usize) -> bool { - let page_offset = self.write_pos % CODE_PAGE_SIZE; + let page_offset = self.write_pos % self.page_size; let capacity = self.page_end().saturating_sub(page_offset); num_bytes <= capacity } /// Add an assembly comment if the feature is on. - /// If not, this becomes an inline no-op. - #[cfg(feature = "disasm")] pub fn add_comment(&mut self, comment: &str) { - let cur_ptr = self.get_write_ptr().into_usize(); + if !self.keep_comments { + return; + } + + let cur_ptr = self.get_write_ptr().raw_addr(self); // If there's no current list of comments for this line number, add one. let this_line_comments = self.asm_comments.entry(cur_ptr).or_default(); @@ -348,28 +381,21 @@ impl CodeBlock { this_line_comments.push(comment.to_string()); } } - #[cfg(not(feature = "disasm"))] - #[inline] - pub fn add_comment(&mut self, _: &str) {} - #[cfg(feature = "disasm")] pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> { self.asm_comments.get(&pos) } - #[allow(unused_variables)] - #[cfg(feature = "disasm")] pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) { - for addr in start_addr.into_usize()..end_addr.into_usize() { + if self.asm_comments.is_empty() { + return; + } + for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) { self.asm_comments.remove(&addr); } } - #[cfg(not(feature = "disasm"))] - #[inline] - pub fn remove_comments(&mut self, _: CodePtr, _: CodePtr) {} pub fn clear_comments(&mut self) { - #[cfg(feature = "disasm")] self.asm_comments.clear(); } @@ -382,7 +408,7 @@ impl CodeBlock { } pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { - self.mem_block.borrow_mut().write_byte(write_ptr, byte) + self.mem_block.write_byte(write_ptr, byte) } // Set the current write position @@ -396,31 +422,31 @@ impl CodeBlock { // Set the current write position from a pointer pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { - let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize(); - self.set_pos(pos); + let pos = code_ptr.as_offset() - self.mem_block.start_ptr().as_offset(); + self.set_pos(pos.try_into().unwrap()); } /// Get a (possibly dangling) direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { - self.mem_block.borrow().start_ptr().add_bytes(offset) + self.mem_block.start_ptr().add_bytes(offset) } /// Convert an address range to memory page indexes against a num_pages()-sized array. - pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> { - let mem_start = self.mem_block.borrow().start_ptr().into_usize(); - let mem_end = self.mem_block.borrow().end_ptr().into_usize(); - assert!(mem_start <= start_addr.into_usize()); - assert!(start_addr.into_usize() <= end_addr.into_usize()); - assert!(end_addr.into_usize() <= mem_end); + pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> impl Iterator<Item = usize> { + let mem_start = self.mem_block.start_ptr().raw_addr(self); + let mem_end = self.mem_block.mapped_end_ptr().raw_addr(self); + assert!(mem_start <= start_addr.raw_addr(self)); + assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self)); + assert!(end_addr.raw_addr(self) <= mem_end); // Ignore empty code ranges if start_addr == end_addr { - return vec![]; + return 0..0; } - let start_page = (start_addr.into_usize() - mem_start) / CODE_PAGE_SIZE; - let end_page = (end_addr.into_usize() - mem_start - 1) / CODE_PAGE_SIZE; - (start_page..=end_page).collect() // TODO: consider returning an iterator + let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size; + let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size; + start_page..end_page + 1 } /// Get a (possibly dangling) direct pointer to the current write position @@ -431,7 +457,7 @@ impl CodeBlock { /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); - if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() { + if self.has_capacity(1) && self.mem_block.write_byte(write_ptr, byte).is_ok() { self.write_pos += 1; } else { self.dropped_bytes = true; @@ -563,14 +589,20 @@ impl CodeBlock { self.label_refs = state.label_refs; } + pub fn mark_all_writeable(&mut self) { + self.mem_block.mark_all_writeable(); + } + pub fn mark_all_executable(&mut self) { - self.mem_block.borrow_mut().mark_all_executable(); + self.mem_block.mark_all_executable(); } /// Code GC. Free code pages that are not on stack and reuse them. - pub fn code_gc(&mut self) { + pub fn code_gc(&mut self, ocb: &mut OutlinedCb) { + assert!(self.inline(), "must use on inline code block"); + // The previous code GC failed to free any pages. Give up. - if CodegenGlobals::get_freed_pages() == &Some(vec![]) { + if self.freed_pages.as_ref() == &Some(vec![]) { return; } @@ -596,11 +628,13 @@ impl CodeBlock { // This currently patches every ISEQ, which works, but in the future, // we could limit that to patch only on-stack ISEQs for optimizing code GC. rb_yjit_tracing_invalidate_all(); - // When code GC runs next time, we could have reused pages in between - // invalidated pages. To invalidate them, we skip freezing them here. - // We free or not reuse the bytes frozen by any past invalidation, so this - // can be safely reset to pass the frozen bytes check on invalidation. - CodegenGlobals::set_inline_frozen_bytes(0); + + // Assert that all code pages are freeable + assert_eq!( + 0, + self.mem_size % self.page_size, + "end of the last code page should be the end of the entire region" + ); // Let VirtuamMem free the pages let mut freed_pages: Vec<usize> = pages_in_use.iter().enumerate() @@ -614,18 +648,21 @@ impl CodeBlock { freed_pages.append(&mut virtual_pages); if let Some(&first_page) = freed_pages.first() { - let mut cb = CodegenGlobals::get_inline_cb(); - cb.write_pos = cb.get_page_pos(first_page); - cb.dropped_bytes = false; - cb.clear_comments(); - - let mut ocb = CodegenGlobals::get_outlined_cb().unwrap(); - ocb.write_pos = ocb.get_page_pos(first_page); - ocb.dropped_bytes = false; - ocb.clear_comments(); + for cb in [&mut *self, ocb.unwrap()] { + cb.write_pos = cb.get_page_pos(first_page); + cb.past_page_bytes = 0; + cb.dropped_bytes = false; + cb.clear_comments(); + } } - CodegenGlobals::set_freed_pages(freed_pages); + // Track which pages are free. + let new_freed_pages = Rc::new(Some(freed_pages)); + let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages)); + ocb.unwrap().freed_pages = new_freed_pages; + assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate + + incr_counter!(code_gc_count); } pub fn inline(&self) -> bool { @@ -653,9 +690,27 @@ impl CodeBlock { let alloc = TestingAllocator::new(mem_size); let mem_start: *const u8 = alloc.mem_start(); - let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); + let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024); + + Self::new(Rc::new(virt_mem), false, Rc::new(None), true) + } + + /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code. + #[cfg(target_arch = "aarch64")] + pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self { + use std::ptr::NonNull; + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; + + freed_pages.sort_unstable(); + let mem_size = Self::PREFERRED_CODE_PAGE_SIZE * + (1 + freed_pages.last().expect("freed_pages vec should not be empty")); + + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024); - Self::new(Rc::new(RefCell::new(virt_mem)), false) + Self::new(Rc::new(virt_mem), false, Rc::new(Some(freed_pages)), true) } } @@ -663,13 +718,20 @@ impl CodeBlock { impl fmt::LowerHex for CodeBlock { fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { for pos in 0..self.write_pos { - let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() }; + let mem_block = &*self.mem_block; + let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() }; fmtr.write_fmt(format_args!("{:02x}", byte))?; } Ok(()) } } +impl crate::virtualmem::CodePtrBase for CodeBlock { + fn base_ptr(&self) -> std::ptr::NonNull<u8> { + self.mem_block.base_ptr() + } +} + /// Wrapper struct so we can use the type system to distinguish /// Between the inlined and outlined code blocks pub struct OutlinedCb { @@ -756,4 +818,30 @@ mod tests assert_eq!(uimm_num_bits((u32::MAX as u64) + 1), 64); assert_eq!(uimm_num_bits(u64::MAX), 64); } + + #[test] + fn test_code_size() { + // Write 4 bytes in the first page + let mut cb = CodeBlock::new_dummy(CodeBlock::PREFERRED_CODE_PAGE_SIZE * 2); + cb.write_bytes(&[0, 0, 0, 0]); + assert_eq!(cb.code_size(), 4); + + // Moving to the next page should not increase code_size + assert!(cb.next_page(cb.get_write_ptr(), |_, _| {})); + assert_eq!(cb.code_size(), 4); + + // Write 4 bytes in the second page + cb.write_bytes(&[0, 0, 0, 0]); + assert_eq!(cb.code_size(), 8); + + // Rewrite 4 bytes in the first page + let old_write_pos = cb.get_write_pos(); + cb.set_pos(0); + cb.write_bytes(&[1, 1, 1, 1]); + + // Moving from an old page to the next page should not increase code_size + assert!(cb.next_page(cb.get_write_ptr(), |_, _| {})); + cb.set_pos(old_write_pos); + assert_eq!(cb.code_size(), 8); + } } diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 67bb5d1ffb..0ef5e92117 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -362,11 +362,6 @@ pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd uimm_opnd(ptr as u64) } -pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd -{ - uimm_opnd(code_ptr.raw_ptr() as u64) -} - /// Write the REX byte fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) { // 0 1 0 0 w r x b @@ -635,7 +630,7 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r panic!("immediate value too large (num_bits={}, num={uimm:?})", num_bits); } }, - _ => unreachable!() + _ => panic!("unknown encoding combo: {opnd0:?} {opnd1:?}") }; } @@ -696,17 +691,17 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) { let end_ptr = cb.get_ptr(cb.write_pos + 5); // Compute the jump offset - let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64(); + let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64; // If the offset fits in 32-bit if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() { - incr_counter!(x86_call_rel32); + incr_counter!(num_send_x86_rel32); call_rel32(cb, rel64.try_into().unwrap()); return; } // Move the pointer into the scratch register and call - incr_counter!(x86_call_reg); + incr_counter!(num_send_x86_reg); mov(cb, scratch_opnd, const_ptr_opnd(dst_ptr)); call(cb, scratch_opnd); } else { @@ -805,6 +800,31 @@ pub fn cqo(cb: &mut CodeBlock) { cb.write_bytes(&[0x48, 0x99]); } +/// imul - signed integer multiply +pub fn imul(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + assert!(opnd0.num_bits() == 64); + assert!(opnd1.num_bits() == 64); + assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + assert!(matches!(opnd1, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + match (opnd0, opnd1) { + (X86Opnd::Reg(_), X86Opnd::Reg(_) | X86Opnd::Mem(_)) => { + //REX.W + 0F AF /rIMUL r64, r/m64 + // Quadword register := Quadword register * r/m64. + write_rm(cb, false, true, opnd0, opnd1, None, &[0x0F, 0xAF]); + } + + // Flip the operands to handle this case. This instruction has weird encoding restrictions. + (X86Opnd::Mem(_), X86Opnd::Reg(_)) => { + //REX.W + 0F AF /rIMUL r64, r/m64 + // Quadword register := Quadword register * r/m64. + write_rm(cb, false, true, opnd1, opnd0, None, &[0x0F, 0xAF]); + } + + _ => unreachable!() + } +} + /// Interrupt 3 - trap to debugger pub fn int3(cb: &mut CodeBlock) { cb.write_byte(0xcc); @@ -872,7 +892,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { let end_ptr = cb.get_ptr(cb.write_pos + 4); // Compute the jump offset - let rel64 = dst_ptr.into_i64() - end_ptr.into_i64(); + let rel64 = dst_ptr.as_offset() - end_ptr.as_offset(); if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() { // Write the relative 32-bit jump offset @@ -932,6 +952,7 @@ pub fn jmp32(cb: &mut CodeBlock, offset: i32) { pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { if let X86Opnd::Reg(reg) = dst { assert!(reg.num_bits == 64); + assert!(matches!(src, X86Opnd::Mem(_) | X86Opnd::IPRel(_))); write_rm(cb, false, true, dst, src, None, &[0x8d]); } else { unreachable!(); @@ -1006,7 +1027,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(imm_num_bits(imm.value) <= (output_num_bits as u8)); + assert!( + mem.num_bits < 64 || imm_num_bits(imm.value) <= (output_num_bits as u8), + "immediate value should be small enough to survive sign extension" + ); cb.write_int(imm.value as u64, output_num_bits); }, // M + UImm @@ -1021,7 +1045,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8)); + assert!( + mem.num_bits < 64 || imm_num_bits(uimm.value as i64) <= (output_num_bits as u8), + "immediate value should be small enough to survive sign extension" + ); cb.write_int(uimm.value, output_num_bits); }, // * + Imm/UImm @@ -1223,8 +1250,8 @@ pub fn ret(cb: &mut CodeBlock) { cb.write_byte(0xC3); } -// Encode a single-operand shift instruction -fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: Option<u8>, opnd0: X86Opnd, opnd1: X86Opnd) { +// Encode a bitwise shift instruction +fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) { assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_))); // Check the size of opnd0 @@ -1234,16 +1261,26 @@ fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_ let sz_pref = opnd_size == 16; let rex_w = opnd_size == 64; - if let X86Opnd::UImm(imm) = opnd1 { - if imm.value == 1 { - write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_one_pref]); - } else { - assert!(imm.num_bits <= 8); - write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_imm_pref]); - cb.write_byte(imm.value as u8); + match opnd1 { + X86Opnd::UImm(imm) => { + if imm.value == 1 { + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_one_pref]); + } else { + assert!(imm.num_bits <= 8); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_imm_pref]); + cb.write_byte(imm.value as u8); + } + } + + X86Opnd::Reg(reg) => { + // We can only use CL/RCX as the shift amount + assert!(reg.reg_no == RCX_REG.reg_no); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_cl_pref]); + } + + _ => { + unreachable!("unsupported operands: {:?}, {:?}", opnd0, opnd1); } - } else { - unreachable!(); } } @@ -1254,7 +1291,7 @@ pub fn sal(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { 0xD1, // opMemOnePref, 0xD3, // opMemClPref, 0xC1, // opMemImmPref, - Some(0x04), + 0x04, opnd0, opnd1 ); @@ -1267,7 +1304,7 @@ pub fn sar(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { 0xD1, // opMemOnePref, 0xD3, // opMemClPref, 0xC1, // opMemImmPref, - Some(0x07), + 0x07, opnd0, opnd1 ); @@ -1280,7 +1317,7 @@ pub fn shl(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { 0xD1, // opMemOnePref, 0xD3, // opMemClPref, 0xC1, // opMemImmPref, - Some(0x04), + 0x04, opnd0, opnd1 ); @@ -1293,7 +1330,7 @@ pub fn shr(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { 0xD1, // opMemOnePref, 0xD3, // opMemClPref, 0xC1, // opMemImmPref, - Some(0x05), + 0x05, opnd0, opnd1 ); diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs index 1cd005747d..eefcbfd52e 100644 --- a/yjit/src/asm/x86_64/tests.rs +++ b/yjit/src/asm/x86_64/tests.rs @@ -68,7 +68,7 @@ fn test_call_ptr() { // calling a lower address check_bytes("e8fbffffff", |cb| { let ptr = cb.get_write_ptr(); - call_ptr(cb, RAX, ptr.raw_ptr()); + call_ptr(cb, RAX, ptr.raw_ptr(cb)); }); } @@ -106,6 +106,15 @@ fn test_cqo() { } #[test] +fn test_imul() { + check_bytes("480fafc3", |cb| imul(cb, RAX, RBX)); + check_bytes("480faf10", |cb| imul(cb, RDX, mem_opnd(64, RAX, 0))); + + // Operands flipped for encoding since multiplication is commutative + check_bytes("480faf10", |cb| imul(cb, mem_opnd(64, RAX, 0), RDX)); +} + +#[test] fn test_jge_label() { check_bytes("0f8dfaffffff", |cb| { let label_idx = cb.new_label("loop".to_owned()); @@ -184,6 +193,7 @@ fn test_mov() { check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1))); //check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine? check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17))); + check_bytes("c7400401000080", |cb| mov(cb, mem_opnd(32, RAX, 4), uimm_opnd(0x80000001))); check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX)); check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10)); check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12))); @@ -340,6 +350,7 @@ fn test_sal() { check_bytes("d1e1", |cb| sal(cb, ECX, uimm_opnd(1))); check_bytes("c1e505", |cb| sal(cb, EBP, uimm_opnd(5))); check_bytes("d1642444", |cb| sal(cb, mem_opnd(32, RSP, 68), uimm_opnd(1))); + check_bytes("48d3e1", |cb| sal(cb, RCX, CL)); } #[test] @@ -361,7 +372,7 @@ fn test_sub() { #[test] #[should_panic] fn test_sub_uimm_too_large() { - // This immedaite becomes a different value after + // This immediate becomes a different value after // sign extension, so not safe to encode. check_bytes("ff", |cb| sub(cb, RCX, uimm_opnd(0x8000_0000))); } @@ -432,15 +443,15 @@ fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> { fn block_comments() { let mut cb = super::CodeBlock::new_dummy(4096); - let first_write_ptr = cb.get_write_ptr().into_usize(); + let first_write_ptr = cb.get_write_ptr().raw_addr(&cb); cb.add_comment("Beginning"); xor(&mut cb, EAX, EAX); // 2 bytes long - let second_write_ptr = cb.get_write_ptr().into_usize(); + let second_write_ptr = cb.get_write_ptr().raw_addr(&cb); cb.add_comment("Two bytes in"); cb.add_comment("Still two bytes in"); cb.add_comment("Still two bytes in"); // Duplicate, should be ignored test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long - let third_write_ptr = cb.get_write_ptr().into_usize(); + let third_write_ptr = cb.get_write_ptr().raw_addr(&cb); cb.add_comment("Ten bytes in"); assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap()); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index eb096ce677..0521e09d0b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -1,13 +1,11 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] +use std::mem::take; -use crate::asm::{CodeBlock}; +use crate::asm::{CodeBlock, OutlinedCb}; use crate::asm::arm64::*; -use crate::codegen::{JITState, CodegenGlobals}; use crate::cruby::*; use crate::backend::ir::*; use crate::virtualmem::CodePtr; +use crate::utils::*; // Use the arm64 register type for this platform pub type Reg = A64Reg; @@ -36,8 +34,25 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const C_SP_REG: A64Opnd = X31; pub const C_SP_STEP: i32 = 16; -// The number of bytes that are generated by emit_jmp_ptr -pub const JMP_PTR_BYTES: usize = 20; +impl CodeBlock { + // The maximum number of bytes that can be generated by emit_jmp_ptr. + pub fn jmp_ptr_bytes(&self) -> usize { + // b instruction's offset is encoded as imm26 times 4. It can jump to + // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128. + let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) { + 1 // b instruction + } else { + 5 // 4 instructions to load a 64-bit absolute address + br instruction + }; + num_insns * 4 + } + + // The maximum number of instructions that can be generated by emit_conditional_jump. + fn conditional_jump_insns(&self) -> i32 { + // The worst case is instructions for a jump + bcond. + self.jmp_ptr_bytes() as i32 / 4 + 1 + } +} /// Map Opnd to A64Opnd impl From<Opnd> for A64Opnd { @@ -52,8 +67,10 @@ impl From<Opnd> for A64Opnd { Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") }, + Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"), Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), + Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"), Opnd::None => panic!( "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." ), @@ -69,11 +86,112 @@ impl From<&Opnd> for A64Opnd { } } +/// Call emit_jmp_ptr and immediately invalidate the written range. +/// This is needed when next_page also moves other_cb that is not invalidated +/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot +/// more than necessary when other_cb jumps from a position early in the page. +/// This invalidates a small range of cb twice, but we accept the small cost. +fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { + #[cfg(not(test))] + let start = cb.get_write_ptr(); + emit_jmp_ptr(cb, dst_ptr, true); + #[cfg(not(test))] + { + let end = cb.get_write_ptr(); + unsafe { rb_jit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; + } +} + +fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { + let src_addr = cb.get_write_ptr().as_offset(); + let dst_addr = dst_ptr.as_offset(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + if padding { + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + assert!(num_insns * 4 <= cb.jmp_ptr_bytes()); + for _ in num_insns..(cb.jmp_ptr_bytes() / 4) { + nop(cb); + } + } +} + +/// Emit the required instructions to load the given value into the +/// given register. Our goal here is to use as few instructions as +/// possible to get this value into the register. +fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; + } + return num_insns; + } +} + +/// List of registers that can be used for stack temps. +/// These are caller-saved registers. +pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; + +#[derive(Debug, PartialEq)] +enum EmitError { + RetryOnNextPage, + OutOfMemory, +} + impl Assembler { - // A special scratch register for intermediate processing. + // Special scratch registers for intermediate processing. // This register is caller-saved (so we don't have to save it before using it) - const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG); + pub const SCRATCH_REG: Reg = X16_REG; + const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); /// Get the list of registers from which we will allocate on this platform @@ -86,7 +204,7 @@ impl Assembler /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec<Reg> { - vec![X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] } /// Split platform-specific instructions @@ -162,7 +280,7 @@ impl Assembler /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { match opnd { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm == 0 { @@ -195,9 +313,13 @@ impl Assembler /// a certain size. If they don't then we need to load them first. fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), - Opnd::Imm(_) => asm.load(opnd), + Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() { + opnd + } else { + asm.load(opnd) + } Opnd::UImm(uimm) => { if ShiftedImmediate::try_from(uimm).is_ok() { opnd @@ -205,7 +327,7 @@ impl Assembler asm.load(opnd) } }, - Opnd::None | Opnd::Value(_) => unreachable!() + Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!() } } @@ -258,7 +380,8 @@ impl Assembler } } - let mut asm_local = Assembler::new_with_label_names(std::mem::take(&mut self.label_names)); + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let asm = &mut asm_local; let mut iterator = self.into_draining_iter(); @@ -280,6 +403,9 @@ impl Assembler *opnd = asm.load(*opnd); } }, + Opnd::Stack { .. } => { + *opnd = asm.lower_stack_opnd(opnd); + } _ => {} }; } @@ -287,11 +413,11 @@ impl Assembler // We are replacing instructions here so we know they are already // being used. It is okay not to use their output here. #[allow(unused_must_use)] - match insn { + match &mut insn { Insn::Add { left, right, .. } => { - match (left, right) { + match (*left, *right) { (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { - asm.add(left, right); + asm.add(*left, *right); }, (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { @@ -299,24 +425,68 @@ impl Assembler asm.add(reg_opnd, opnd1); }, _ => { - let opnd0 = split_load_operand(asm, left); - let opnd1 = split_shifted_immediate(asm, right); + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); asm.add(opnd0, opnd1); } } }, - Insn::And { left, right, .. } => { - let (opnd0, opnd1) = split_boolean_operands(asm, left, right); - asm.and(opnd0, opnd1); - }, - Insn::Or { left, right, .. } => { - let (opnd0, opnd1) = split_boolean_operands(asm, left, right); - asm.or(opnd0, opnd1); - }, - Insn::Xor { left, right, .. } => { - let (opnd0, opnd1) = split_boolean_operands(asm, left, right); - asm.xor(opnd0, opnd1); - }, + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right); + *left = opnd0; + *right = opnd1; + + // Since these instructions are lowered to an instruction that have 2 input + // registers and an output register, look to merge with an `Insn::Mov` that + // follows which puts the output in another register. For example: + // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`. + if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) { + if live_ranges[index] == index + 1 { + // Check after potentially lowering a stack operand to a register operand + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + if out == src && matches!(lowered_dest, Opnd::Reg(_)) { + *out = lowered_dest; + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + } + } + + asm.push_insn(insn); + } + // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch. + ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } | + ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if { + let same_opnd_if_test = if let Insn::Test { .. } = insn { + left == right + } else { + true + }; + + same_opnd_if_test && if let Some( + Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target) + ) = iterator.peek() { + matches!(target, Target::SideExit { .. }) + } else { + false + } + } => { + let reg = split_load_operand(asm, *left); + match iterator.peek() { + Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)), + Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)), + _ => () + } + + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged jump instruction + } Insn::CCall { opnds, fptr, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); @@ -330,21 +500,21 @@ impl Assembler // a UImm of 0 along as the argument to the move. let value = match opnd { Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), - Opnd::Mem(_) => split_memory_address(asm, opnd), - _ => opnd + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd }; - asm.load_into(C_ARG_OPNDS[idx], value); + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value); } // Now we push the CCall without any arguments so that it // just performs the call. - asm.ccall(fptr, vec![]); + asm.ccall(*fptr, vec![]); }, Insn::Cmp { left, right } => { - let opnd0 = split_load_operand(asm, left); + let opnd0 = split_load_operand(asm, *left); let opnd0 = split_less_than_32_cmp(asm, opnd0); - let split_right = split_shifted_immediate(asm, right); + let split_right = split_shifted_immediate(asm, *right); let opnd1 = match split_right { Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => { split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap() @@ -364,81 +534,66 @@ impl Assembler // make sure the displacement isn't too large and then // load it into the return register. Opnd::Mem(_) => { - let split = split_memory_address(asm, opnd); + let split = split_memory_address(asm, *opnd); asm.load_into(C_RET_OPND, split); }, // Otherwise we just need to load the value into the // return register. _ => { - asm.load_into(C_RET_OPND, opnd); + asm.load_into(C_RET_OPND, *opnd); } } asm.cret(C_RET_OPND); }, - Insn::CSelZ { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_z(opnd0, opnd1); - }, - Insn::CSelNZ { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_nz(opnd0, opnd1); - }, - Insn::CSelE { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_e(opnd0, opnd1); - }, - Insn::CSelNE { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_ne(opnd0, opnd1); - }, - Insn::CSelL { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_l(opnd0, opnd1); - }, - Insn::CSelLE { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_le(opnd0, opnd1); - }, - Insn::CSelG { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_g(opnd0, opnd1); - }, - Insn::CSelGE { truthy, falsy, .. } => { - let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); - asm.csel_ge(opnd0, opnd1); + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy); + *truthy = opnd0; + *falsy = opnd1; + // Merge `csel` and `mov` into a single `csel` when possible + match iterator.peek() { + Some(Insn::Mov { dest: Opnd::Reg(reg), src }) + if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => { + *out = Opnd::Reg(*reg); + asm.push_insn(insn); + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + asm.push_insn(insn); + } + } }, Insn::IncrCounter { mem, value } => { let counter_addr = match mem { - Opnd::Mem(_) => split_lea_operand(asm, mem), - _ => mem + Opnd::Mem(_) => split_lea_operand(asm, *mem), + _ => *mem }; - asm.incr_counter(counter_addr, value); + asm.incr_counter(counter_addr, *value); }, Insn::JmpOpnd(opnd) => { if let Opnd::Mem(_) = opnd { - let opnd0 = split_load_operand(asm, opnd); + let opnd0 = split_load_operand(asm, *opnd); asm.jmp_opnd(opnd0); } else { - asm.jmp_opnd(opnd); + asm.jmp_opnd(*opnd); } }, - Insn::Load { opnd, .. } => { - let value = match opnd { - Opnd::Mem(_) => split_memory_address(asm, opnd), - _ => opnd - }; - - asm.load(value); - }, - Insn::LoadInto { dest, opnd } => { - let value = match opnd { - Opnd::Mem(_) => split_memory_address(asm, opnd), - _ => opnd + Insn::Load { opnd, .. } | + Insn::LoadInto { opnd, .. } => { + *opnd = match opnd { + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd }; - - asm.load_into(dest, value); + asm.push_insn(insn); }, Insn::LoadSExt { opnd, .. } => { match opnd { @@ -449,39 +604,50 @@ impl Assembler Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } | Opnd::Mem(Mem { num_bits: 32, .. }) => { - asm.load_sext(opnd); + asm.load_sext(*opnd); }, _ => { - asm.load(opnd); + asm.load(*opnd); } }; }, Insn::Mov { dest, src } => { - let value: Opnd = match (dest, src) { - // If the first operand is zero, then we can just use - // the zero register. - (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG), - // If the first operand is a memory operand, we're going - // to transform this into a store instruction, so we'll - // need to load this anyway. - (Opnd::Mem(_), Opnd::UImm(_)) => asm.load(src), - // The value that is being moved must be either a - // register or an immediate that can be encoded as a - // bitmask immediate. Otherwise, we'll need to split the - // move into multiple instructions. - _ => split_bitmask_immediate(asm, src, dest.rm_num_bits()) - }; + match (&dest, &src) { + // If we're attempting to load into a memory operand, then + // we'll switch over to the store instruction. + (Opnd::Mem(_), _) => { + let opnd0 = split_memory_address(asm, *dest); + let value = match *src { + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // If the first operand is a memory operand, we're going + // to transform this into a store instruction, so we'll + // need to load this anyway. + Opnd::UImm(_) => asm.load(*src), + // The value that is being moved must be either a + // register or an immediate that can be encoded as a + // bitmask immediate. Otherwise, we'll need to split the + // move into multiple instructions. + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()) + }; - // If we're attempting to load into a memory operand, then - // we'll switch over to the store instruction. Otherwise - // we'll use the normal mov instruction. - match dest { - Opnd::Mem(_) => { - let opnd0 = split_memory_address(asm, dest); asm.store(opnd0, value); }, - Opnd::Reg(_) => { - asm.mov(dest, value); + // If we're loading a memory operand into a register, then + // we'll switch over to the load instruction. + (Opnd::Reg(_), Opnd::Mem(_)) => { + let value = split_memory_address(asm, *src); + asm.load_into(*dest, value); + }, + // Otherwise we'll use the normal mov instruction. + (Opnd::Reg(_), _) => { + let value = match *src { + // Unlike other instructions, we can avoid splitting this case, using movz. + Opnd::UImm(uimm) if uimm <= 0xffff => *src, + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()), + }; + asm.mov(*dest, value); }, _ => unreachable!() }; @@ -490,18 +656,26 @@ impl Assembler // The value that is being negated must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnd { - Opnd::Mem(_) => split_load_operand(asm, opnd), - _ => opnd + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd }; asm.not(opnd0); }, - Insn::Store { dest, src } => { - // The displacement for the STUR instruction can't be more - // than 9 bits long. If it's longer, we need to load the - // memory address into a register first. - let opnd0 = split_memory_address(asm, dest); + Insn::LShift { opnd, .. } | + Insn::RShift { opnd, .. } | + Insn::URShift { opnd, .. } => { + // The operand must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd + }; + *opnd = opnd0; + asm.push_insn(insn); + }, + Insn::Store { dest, src } => { // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match src { @@ -509,26 +683,43 @@ impl Assembler // the zero register. Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), // Otherwise we'll check if we need to load it first. - _ => split_load_operand(asm, src) + _ => split_load_operand(asm, *src) }; - asm.store(opnd0, opnd1); + match dest { + Opnd::Reg(_) => { + // Store does not support a register as a dest operand. + asm.mov(*dest, opnd1); + } + _ => { + // The displacement for the STUR instruction can't be more + // than 9 bits long. If it's longer, we need to load the + // memory address into a register first. + let opnd0 = split_memory_address(asm, *dest); + asm.store(opnd0, opnd1); + } + } }, Insn::Sub { left, right, .. } => { - let opnd0 = split_load_operand(asm, left); - let opnd1 = split_shifted_immediate(asm, right); + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); asm.sub(opnd0, opnd1); }, + Insn::Mul { left, right, .. } => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_load_operand(asm, *right); + asm.mul(opnd0, opnd1); + }, Insn::Test { left, right } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. - let opnd0 = split_load_operand(asm, left); + let opnd0 = split_load_operand(asm, *left); // The second value must be either a register or an // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. - let opnd1 = split_bitmask_immediate(asm, right, opnd0.rm_num_bits()); + let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits()); asm.test(opnd0, opnd1); }, _ => { @@ -551,9 +742,8 @@ impl Assembler } /// Emit platform-specific machine code - /// Returns a list of GC offsets - pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> - { + /// Returns a list of GC offsets. Can return failure to signal caller to retry. + fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result<Vec<u32>, EmitError> { /// Determine how many instructions it will take to represent moving /// this value into a register. Note that the return value of this /// function must correspond to how many instructions are used to @@ -574,59 +764,13 @@ impl Assembler } } - /// Emit the required instructions to load the given value into the - /// given register. Our goal here is to use as few instructions as - /// possible to get this value into the register. - fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { - let mut current = value; - - if current <= 0xffff { - // If the value fits into a single movz - // instruction, then we'll use that. - movz(cb, rd, A64Opnd::new_uimm(current), 0); - return 1; - } else if BitmaskImmediate::try_from(current).is_ok() { - // Otherwise, if the immediate can be encoded - // with the special bitmask immediate encoding, - // we'll use that. - mov(cb, rd, A64Opnd::new_uimm(current)); - return 1; - } else { - // Finally we'll fall back to encoding the value - // using movz for the first 16 bits and movk for - // each subsequent set of 16 bits as long we - // they are necessary. - movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); - let mut num_insns = 1; - - // (We're sure this is necessary since we - // checked if it only fit into movz above). - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); - num_insns += 1; - - if current > 0xffff { - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); - num_insns += 1; - } - - if current > 0xffff { - current >>= 16; - movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); - num_insns += 1; - } - return num_insns; - } - } - /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { match target { Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => { - let dst_addr = dst_ptr.into_i64(); - let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as @@ -640,10 +784,22 @@ impl Assembler // Here we're going to return 1 because we've only // written out 1 instruction. 1 + } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond + // If the jump offset fits into the unconditional jump as + // an immediate value, we can use inverse b.cond + b. + // + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instruction used for branching. + bcond(cb, Condition::inverse(CONDITION), 2.into()); + b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond + + // We've only written out 2 instructions. + 2 } else { // Otherwise, we need to load the address into a // register and use the branch register instruction. - let dst_addr = dst_ptr.into_u64(); + let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64(); let load_insns: i32 = emit_load_size(dst_addr).into(); // We're going to write out the inverse condition so @@ -663,7 +819,8 @@ impl Assembler // We need to make sure we have at least 6 instructions for // every kind of jump for invalidation purposes, so we're // going to write out padding nop instructions here. - for _ in num_insns..6 { nop(cb); } + assert!(num_insns <= cb.conditional_jump_insns()); + for _ in num_insns..cb.conditional_jump_insns() { nop(cb); } } }, Target::Label(label_idx) => { @@ -676,62 +833,74 @@ impl Assembler bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); }); }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, }; } - /// Emit a push instruction for the given operand by adding to the stack - /// pointer and then storing the given value. + /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero + fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) { + if let Target::SideExitPtr(dst_ptr) = target { + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); + + if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the offset fits in one instruction, generate cbz or cbnz + let bytes = (dst_addr - src_addr) as i32; + if branch_if_zero { + cbz(cb, reg, InstructionOffset::from_bytes(bytes)); + } else { + cbnz(cb, reg, InstructionOffset::from_bytes(bytes)); + } + } else { + // Otherwise, we load the address into a register and + // use the branch register instruction. Note that because + // side exits should always be close, this form should be + // rare or impossible to see. + let dst_addr = dst_ptr.raw_addr(cb) as u64; + let load_insns: i32 = emit_load_size(dst_addr).into(); + + // Write out the inverse condition so that if + // it doesn't match it will skip over the + // instructions used for branching. + if branch_if_zero { + cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } else { + cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + } + } else { + unreachable!("We should only generate Joz/Jonz with side-exit targets"); + } + } + + /// Push a value to the stack by subtracting from the stack pointer then storing, + /// leaving an 8-byte gap for alignment. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); } - /// Emit a pop instruction into the given operand by loading the value - /// and then subtracting from the stack pointer. + /// Pop a value from the stack by loading `[sp]` then adding to the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } - fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { - let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = dst_ptr.into_i64(); - - // If the offset is short enough, then we'll use the - // branch instruction. Otherwise, we'll move the - // destination into a register and use the branch - // register instruction. - let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { - b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); - 1 + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Result<Target, EmitError> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()) + .ok_or(EmitError::OutOfMemory)?; + Ok(Target::SideExitPtr(side_exit)) } else { - let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); - br(cb, Assembler::SCRATCH0); - num_insns + 1 - }; - - if padding { - // Make sure it's always a consistent number of - // instructions in case it gets patched and has to - // use the other branch. - for _ in num_insns..(JMP_PTR_BYTES / 4) { - nop(cb); - } - } - } - - /// Call emit_jmp_ptr and immediately invalidate the written range. - /// This is needed when next_page also moves other_cb that is not invalidated - /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot - /// more than necessary when other_cb jumps from a position early in the page. - /// This invalidates a small range of cb twice, but we accept the small cost. - fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { - #[cfg(not(test))] - let start = cb.get_write_ptr(); - emit_jmp_ptr(cb, dst_ptr, true); - #[cfg(not(test))] - { - let end = cb.get_write_ptr(); - use crate::cruby::rb_yjit_icache_invalidate; - unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) }; + Ok(target) } } @@ -740,6 +909,9 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec<u32> = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); let mut insn_idx: usize = 0; @@ -751,16 +923,14 @@ impl Assembler match insn { Insn::Comment(text) => { - if cfg!(feature = "disasm") { - cb.add_comment(text); - } + cb.add_comment(text); }, Insn::Label(target) => { cb.write_label(target.unwrap_label_idx()); }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())) } Insn::BakeString(text) => { for byte in text.as_bytes() { @@ -777,9 +947,6 @@ impl Assembler cb.write_byte(0); } }, - Insn::Add { left, right, out } => { - adds(cb, out.into(), left.into(), right.into()); - }, Insn::FrameSetup => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); @@ -792,9 +959,39 @@ impl Assembler ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, + Insn::Add { left, right, out } => { + adds(cb, out.into(), left.into(), right.into()); + }, Insn::Sub { left, right, out } => { subs(cb, out.into(), left.into(), right.into()); }, + Insn::Mul { left, right, out } => { + // If the next instruction is jo (jump on overflow) + match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { + (Some(Insn::JoMul(_)), _) | + (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => { + // Compute the high 64 bits + smulh(cb, Self::SCRATCH0, left.into(), right.into()); + + // Compute the low 64 bits + // This may clobber one of the input registers, + // so we do it after smulh + mul(cb, out.into(), left.into(), right.into()); + + // Produce a register that is all zeros or all ones + // Based on the sign bit of the 64-bit mul result + asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63)); + + // If the high 64-bits are not all zeros or all ones, + // matching the sign bit, then we have an overflow + cmp(cb, Self::SCRATCH0, Self::SCRATCH1); + // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> + } + _ => { + mul(cb, out.into(), left.into(), right.into()); + } + } + }, Insn::And { left, right, out } => { and(cb, out.into(), left.into(), right.into()); }, @@ -864,6 +1061,12 @@ impl Assembler let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); insn_gc_offsets.push(ptr_offset); }, + Opnd::CArg { .. } => { + unreachable!("C argument operand was not lowered before arm64_emit"); + } + Opnd::Stack { .. } => { + unreachable!("Stack operand was not lowered before arm64_emit"); + } Opnd::None => { unreachable!("Attempted to load from None operand"); } @@ -882,7 +1085,18 @@ impl Assembler }; }, Insn::Mov { dest, src } => { - mov(cb, dest.into(), src.into()); + // This supports the following two kinds of immediates: + // * The value fits into a single movz instruction + // * It can be encoded with the special bitmask immediate encoding + // arm64_split() should have split other immediates that require multiple instructions. + match src { + Opnd::UImm(uimm) if *uimm <= 0xffff => { + movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0); + }, + _ => { + mov(cb, dest.into(), src.into()); + } + } }, Insn::Lea { opnd, out } => { let opnd: A64Opnd = opnd.into(); @@ -901,14 +1115,20 @@ impl Assembler } }; }, - Insn::LeaLabel { out, target, .. } => { - let label_idx = target.unwrap_label_idx(); + Insn::LeaJumpTarget { out, target, .. } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); + }); - cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| { - adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); - }); - - mov(cb, out.into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + emit_load_value(cb, out.into(), target_addr); + } }, Insn::CPush(opnd) => { emit_push(cb, opnd.into()); @@ -934,8 +1154,8 @@ impl Assembler let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register - msr(cb, SystemRegister::NZCV, Self::SCRATCH0); emit_pop(cb, Self::SCRATCH0); + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); @@ -943,7 +1163,7 @@ impl Assembler }, Insn::CCall { fptr, .. } => { // The offset to the call target in bytes - let src_addr = cb.get_write_ptr().into_i64(); + let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; let dst_addr = *fptr as i64; // Use BL if the offset is short enough to encode as an immediate. @@ -968,12 +1188,12 @@ impl Assembler br(cb, opnd.into()); }, Insn::Jmp(target) => { - match target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(dst_ptr) => { - emit_jmp_ptr(cb, *dst_ptr, true); + emit_jmp_ptr(cb, dst_ptr, true); }, Target::SideExitPtr(dst_ptr) => { - emit_jmp_ptr(cb, *dst_ptr, false); + emit_jmp_ptr(cb, dst_ptr, false); }, Target::Label(label_idx) => { // Here we're going to save enough space for @@ -981,27 +1201,45 @@ impl Assembler // instruction once we know the offset. We're going // to assume we can fit into a single b instruction. // It will panic otherwise. - cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| { + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); b(cb, InstructionOffset::from_bytes(bytes)); }); }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, }; }, Insn::Je(target) | Insn::Jz(target) => { - emit_conditional_jump::<{Condition::EQ}>(cb, *target); + emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?); }, - Insn::Jne(target) | Insn::Jnz(target) => { - emit_conditional_jump::<{Condition::NE}>(cb, *target); + Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jl(target) => { - emit_conditional_jump::<{Condition::LT}>(cb, *target); + emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jg(target) => { + emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jge(target) => { + emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jbe(target) => { - emit_conditional_jump::<{Condition::LS}>(cb, *target); + emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jb(target) => { + emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jo(target) => { - emit_conditional_jump::<{Condition::VS}>(cb, *target); + emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Joz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jonz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?); }, Insn::IncrCounter { mem, value } => { let label = cb.new_label("incr_counter_loop".to_string()); @@ -1045,7 +1283,7 @@ impl Assembler } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { - while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES && !cb.has_dropped_bytes() { + while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() { nop(cb); } } @@ -1055,19 +1293,39 @@ impl Assembler if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) { // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); + + // We don't want label references to cross page boundaries. Signal caller for + // retry. + if !self.label_names.is_empty() { + return Err(EmitError::RetryOnNextPage); + } } else { insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } - gc_offsets + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return Err(EmitError::OutOfMemory) + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Ok(gc_offsets) + } } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> - { - let mut asm = self.arm64_split().alloc_regs(regs); + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.arm64_split(); + let mut asm = asm.alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { @@ -1076,11 +1334,24 @@ impl Assembler } let start_ptr = cb.get_write_ptr(); - let gc_offsets = asm.arm64_emit(cb); + let starting_label_state = cb.get_label_state(); + let mut ocb = ocb; // for &mut + let emit_result = match asm.arm64_emit(cb, &mut ocb) { + Err(EmitError::RetryOnNextPage) => { + // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB + // range limit. We can easily exceed the limit in case the jump straddles two pages. + // In this case, we retry with a fresh page once. + cb.set_label_state(starting_label_state); + if cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation) { + asm.arm64_emit(cb, &mut ocb) + } else { + Err(EmitError::OutOfMemory) + } + } + result => result + }; - if cb.has_dropped_bytes() { - cb.clear_labels(); - } else { + if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) { cb.link_labels(); // Invalidate icache for newly written out region so we don't run stale code. @@ -1089,21 +1360,26 @@ impl Assembler #[cfg(not(test))] cb.without_page_end_reserve(|cb| { for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { - unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + unsafe { rb_jit_icache_invalidate(start as _, end as _) }; } }); - } - gc_offsets + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } } } #[cfg(test)] mod tests { use super::*; + use crate::disasm::*; fn setup_asm() -> (Assembler, CodeBlock) { - (Assembler::new(), CodeBlock::new_dummy(1024)) + (Assembler::new(0), CodeBlock::new_dummy(1024)) } #[test] @@ -1112,7 +1388,7 @@ mod tests { let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); - asm.compile_with_regs(&mut cb, vec![X3_REG]); + asm.compile_with_regs(&mut cb, None, vec![X3_REG]); // Assert that only 2 instructions were written. assert_eq!(8, cb.get_write_pos()); @@ -1142,7 +1418,7 @@ mod tests { fn test_emit_cpop_all() { let (mut asm, mut cb) = setup_asm(); - asm.cpop_all(); + asm.cpop_all(crate::core::RegMapping::default()); asm.compile_with_num_regs(&mut cb, 0); } @@ -1159,8 +1435,7 @@ mod tests { fn test_emit_je_fits_into_bcond() { let (mut asm, mut cb) = setup_asm(); - let offset = 80; - let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + let target: CodePtr = cb.get_write_ptr().add_bytes(80); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); @@ -1171,7 +1446,7 @@ mod tests { let (mut asm, mut cb) = setup_asm(); let offset = 1 << 21; - let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + let target: CodePtr = cb.get_write_ptr().add_bytes(offset); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); @@ -1182,7 +1457,7 @@ mod tests { let (mut asm, mut cb) = setup_asm(); let label = asm.new_label("label"); - let opnd = asm.lea_label(label); + let opnd = asm.lea_jump_target(label); asm.write_label(label); asm.bake_string("Hello, world!"); @@ -1401,6 +1676,47 @@ mod tests { } #[test] + fn test_bcond_straddling_code_pages() { + const LANDING_PAGE: usize = 65; + let mut asm = Assembler::new(0); + let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); + + // Skip to near the end of the page. Room for two instructions. + cb.set_pos(cb.page_start_pos() + cb.page_end() - 8); + + let end = asm.new_label("end"); + // Start with a conditional jump... + asm.jz(end); + + // A few instructions, enough to cause a page switch. + let sum = asm.add(399.into(), 111.into()); + let xorred = asm.xor(sum, 859.into()); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred); + asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred); + + // The branch target. It should be in the landing page. + asm.write_label(end); + asm.cret(xorred); + + // [Bug #19385] + // This used to panic with "The offset must be 19 bits or less." + // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB. + let starting_pos = cb.get_write_pos(); + asm.compile_with_num_regs(&mut cb, 2); + let gap = cb.get_write_pos() - starting_pos; + assert!(gap > 0b1111111111111111111); + + let instruction_at_starting_pos: [u8; 4] = unsafe { + std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4) + }.try_into().unwrap(); + assert_eq!( + 0b000101 << 26_u32, + u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32), + "starting instruction should be an unconditional branch to the new page (B)" + ); + } + + #[test] fn test_emit_xor() { let (mut asm, mut cb) = setup_asm(); @@ -1408,6 +1724,11 @@ mod tests { asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "0b0001ca4b0000f8", " + 0x0: eor x11, x0, x1 + 0x4: stur x11, [x2] + "); } #[test] @@ -1433,4 +1754,76 @@ mod tests { )), } } + + #[test] + fn test_replace_mov_with_ldur() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "618240f8", {" + 0x0: ldur x1, [x19, #8] + "}); + } + + #[test] + fn test_not_split_mov() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff)); + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "e1ff9fd2e10370b2", {" + 0x0: mov x1, #0xffff + 0x4: orr x1, xzr, #0x10000 + "}); + } + + #[test] + fn test_merge_csel_mov() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.csel_l(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {" + 0x0: mov x11, #0x14 + 0x4: mov x12, #0 + 0x8: csel x1, x11, x12, lt + "}); + } + + #[test] + fn test_add_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into()); + let out = asm.add(out, 1_usize.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {" + 0x0: adds x11, x9, #1 + 0x4: adds x11, x11, #1 + 0x8: mov x1, x11 + "}); + } + + #[test] + fn test_mul_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {" + 0x0: mov x11, #3 + 0x4: mul x11, x9, x11 + 0x8: mov x1, x11 + "}); + } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index c97276de9b..3fb67bc7cc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -1,23 +1,16 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - -use std::cell::Cell; +use std::collections::HashMap; use std::fmt; use std::convert::From; -use std::io::Write; use std::mem::take; -use crate::cruby::{VALUE}; -use crate::virtualmem::{CodePtr}; -use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; -use crate::core::{Context, Type, TempMapping}; +use crate::codegen::{gen_counted_exit, gen_outlined_exit}; +use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE, VM_ENV_DATA_SIZE}; +use crate::virtualmem::CodePtr; +use crate::asm::{CodeBlock, OutlinedCb}; +use crate::core::{Context, RegMapping, RegOpnd, MAX_CTX_TEMPS}; use crate::options::*; +use crate::stats::*; -#[cfg(target_arch = "x86_64")] -use crate::backend::x86_64::*; - -#[cfg(target_arch = "aarch64")] -use crate::backend::arm64::*; +use crate::backend::current::*; pub const EC: Opnd = _EC; pub const CFP: Opnd = _CFP; @@ -25,6 +18,7 @@ pub const SP: Opnd = _SP; pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; pub const C_RET_OPND: Opnd = _C_RET_OPND; +pub use crate::backend::current::{Reg, C_RET_REG}; // Memory operand base #[derive(Clone, Copy, PartialEq, Eq, Debug)] @@ -69,9 +63,28 @@ pub enum Opnd // Immediate Ruby value, may be GC'd, movable Value(VALUE), + /// C argument register. The alloc_regs resolves its register dependencies. + CArg(Reg), + // Output of a preceding instruction in this block InsnOut{ idx: usize, num_bits: u8 }, + /// Pointer to a slot on the VM stack + Stack { + /// Index from stack top. Used for conversion to StackOpnd. + idx: i32, + /// Number of bits for Opnd::Reg and Opnd::Mem. + num_bits: u8, + /// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg. + stack_size: u8, + /// The number of local variables in the current ISEQ. Used only for locals. + num_locals: Option<u32>, + /// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem. + sp_offset: i8, + /// ctx.reg_mapping when this operand is read. Used for register allocation. + reg_mapping: Option<RegMapping> + }, + // Low-level operands, for lowering Imm(i64), // Raw signed immediate UImm(u64), // Raw unsigned immediate @@ -85,6 +98,8 @@ impl fmt::Debug for Opnd { match self { Self::None => write!(fmt, "None"), Value(val) => write!(fmt, "Value({val:?})"), + CArg(reg) => write!(fmt, "CArg({reg:?})"), + Stack { idx, sp_offset, .. } => write!(fmt, "SP[{}]", *sp_offset as i32 - idx - 1), InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), Imm(signed) => write!(fmt, "{signed:x}_i64"), UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), @@ -127,10 +142,11 @@ impl Opnd Opnd::UImm(ptr as u64) } - pub fn is_some(&self) -> bool { - match *self { - Opnd::None => false, - _ => true, + /// Constructor for a C argument operand + pub fn c_arg(reg_opnd: Opnd) -> Self { + match reg_opnd { + Opnd::Reg(reg) => Opnd::CArg(reg), + _ => unreachable!(), } } @@ -158,6 +174,7 @@ impl Opnd Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))), Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })), Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }), + Opnd::Stack { idx, stack_size, num_locals, sp_offset, reg_mapping, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping }), _ => None, } } @@ -211,6 +228,29 @@ impl Opnd pub fn match_num_bits(opnds: &[Opnd]) -> u8 { Self::match_num_bits_iter(opnds.iter()) } + + /// Convert Opnd::Stack into RegMapping + pub fn reg_opnd(&self) -> RegOpnd { + self.get_reg_opnd().unwrap() + } + + /// Convert an operand into RegMapping if it's Opnd::Stack + pub fn get_reg_opnd(&self) -> Option<RegOpnd> { + match *self { + Opnd::Stack { idx, stack_size, num_locals, .. } => Some( + if let Some(num_locals) = num_locals { + let last_idx = stack_size as i32 + VM_ENV_DATA_SIZE as i32 - 1; + assert!(last_idx <= idx, "Local index {} must be >= last local index {}", idx, last_idx); + assert!(idx <= last_idx + num_locals as i32, "Local index {} must be < last local index {} + local size {}", idx, last_idx, num_locals); + RegOpnd::Local((last_idx + num_locals as i32 - idx) as u8) + } else { + assert!(idx < stack_size as i32); + RegOpnd::Stack((stack_size as i32 - idx - 1) as u8) + } + ), + _ => None, + } + } } impl From<usize> for Opnd { @@ -254,13 +294,22 @@ impl From<VALUE> for Opnd { #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Target { - CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code - SideExitPtr(CodePtr), // Pointer to a side exit code - Label(usize), // A label within the generated code + /// Pointer to a piece of YJIT-generated code + CodePtr(CodePtr), + /// Side exit with a counter + SideExit { counter: Counter, context: Option<SideExitContext> }, + /// Pointer to a side exit code + SideExitPtr(CodePtr), + /// A label within the generated code + Label(usize), } impl Target { + pub fn side_exit(counter: Counter) -> Target { + Target::SideExit { counter, context: None } + } + pub fn unwrap_label_idx(&self) -> usize { match self { Target::Label(idx) => *idx, @@ -283,7 +332,7 @@ impl From<CodePtr> for Target { } } -type PosMarkerFn = Box<dyn Fn(CodePtr)>; +type PosMarkerFn = Box<dyn Fn(CodePtr, &CodeBlock)>; /// YJIT IR instruction pub enum Insn { @@ -298,6 +347,7 @@ pub enum Insn { BakeString(String), // Trigger a debugger breakpoint + #[allow(dead_code)] Breakpoint, /// Add a comment into the IR at the point that this instruction is added. @@ -363,15 +413,24 @@ pub enum Insn { // Produces no output IncrCounter { mem: Opnd, value: Opnd }, - /// Jump if below or equal + /// Jump if below or equal (unsigned) Jbe(Target), + /// Jump if below (unsigned) + Jb(Target), + /// Jump if equal Je(Target), /// Jump if lower Jl(Target), + /// Jump if greater + Jg(Target), + + /// Jump if greater or equal + Jge(Target), + // Unconditional jump to a branch target Jmp(Target), @@ -387,15 +446,23 @@ pub enum Insn { /// Jump if overflow Jo(Target), + /// Jump if overflow in multiplication + JoMul(Target), + /// Jump if zero Jz(Target), + /// Jump if operand is zero (only used during lowering at the moment) + Joz(Opnd, Target), + + /// Jump if operand is non-zero (only used during lowering at the moment) + Jonz(Opnd, Target), + // Add a label into the IR at the point that this instruction is added. Label(Target), - // Load effective address relative to the current instruction pointer. It - // accepts a single signed immediate operand. - LeaLabel { target: Target, out: Opnd }, + /// Get the code address of a jump target + LeaJumpTarget { target: Target, out: Opnd }, // Load effective address Lea { opnd: Opnd, out: Opnd }, @@ -428,7 +495,7 @@ pub enum Insn { // binary OR operation. Or { left: Opnd, right: Opnd, out: Opnd }, - /// Pad nop instructions to accomodate Op::Jmp in case the block or the insn + /// Pad nop instructions to accommodate Op::Jmp in case the block or the insn /// is invalidated. PadInvalPatch, @@ -441,9 +508,12 @@ pub enum Insn { // Low-level instruction to store a value to memory. Store { dest: Opnd, src: Opnd }, - // This is the same as the OP_ADD instruction, except for subtraction. + // This is the same as the add instruction, except for subtraction. Sub { left: Opnd, right: Opnd, out: Opnd }, + // Integer multiplication + Mul { left: Opnd, right: Opnd, out: Opnd }, + // Bitwise AND test instruction Test { left: Opnd, right: Opnd }, @@ -458,16 +528,41 @@ pub enum Insn { impl Insn { /// Create an iterator that will yield a non-mutable reference to each /// operand in turn for this instruction. - pub(super) fn opnd_iter(&self) -> InsnOpndIterator { + pub(super) fn opnd_iter(&self) -> InsnOpndIterator<'_> { InsnOpndIterator::new(self) } /// Create an iterator that will yield a mutable reference to each operand /// in turn for this instruction. - pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator { + pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator<'_> { InsnOpndMutIterator::new(self) } + /// Get a mutable reference to a Target if it exists. + pub(super) fn target_mut(&mut self) -> Option<&mut Target> { + match self { + Insn::Jbe(target) | + Insn::Jb(target) | + Insn::Je(target) | + Insn::Jl(target) | + Insn::Jg(target) | + Insn::Jge(target) | + Insn::Jmp(target) | + Insn::Jne(target) | + Insn::Jnz(target) | + Insn::Jo(target) | + Insn::Jz(target) | + Insn::Label(target) | + Insn::JoMul(target) | + Insn::Joz(_, target) | + Insn::Jonz(_, target) | + Insn::LeaJumpTarget { target, .. } => { + Some(target) + } + _ => None, + } + } + /// Returns a string that describes which operation this instruction is /// performing. This is used for debugging. fn op(&self) -> &'static str { @@ -497,16 +592,22 @@ impl Insn { Insn::FrameTeardown => "FrameTeardown", Insn::IncrCounter { .. } => "IncrCounter", Insn::Jbe(_) => "Jbe", + Insn::Jb(_) => "Jb", Insn::Je(_) => "Je", Insn::Jl(_) => "Jl", + Insn::Jg(_) => "Jg", + Insn::Jge(_) => "Jge", Insn::Jmp(_) => "Jmp", Insn::JmpOpnd(_) => "JmpOpnd", Insn::Jne(_) => "Jne", Insn::Jnz(_) => "Jnz", Insn::Jo(_) => "Jo", + Insn::JoMul(_) => "JoMul", Insn::Jz(_) => "Jz", + Insn::Joz(..) => "Joz", + Insn::Jonz(..) => "Jonz", Insn::Label(_) => "Label", - Insn::LeaLabel { .. } => "LeaLabel", + Insn::LeaJumpTarget { .. } => "LeaJumpTarget", Insn::Lea { .. } => "Lea", Insn::LiveReg { .. } => "LiveReg", Insn::Load { .. } => "Load", @@ -521,6 +622,7 @@ impl Insn { Insn::RShift { .. } => "RShift", Insn::Store { .. } => "Store", Insn::Sub { .. } => "Sub", + Insn::Mul { .. } => "Mul", Insn::Test { .. } => "Test", Insn::URShift { .. } => "URShift", Insn::Xor { .. } => "Xor" @@ -544,7 +646,7 @@ impl Insn { Insn::CSelNZ { out, .. } | Insn::CSelZ { out, .. } | Insn::Lea { out, .. } | - Insn::LeaLabel { out, .. } | + Insn::LeaJumpTarget { out, .. } | Insn::LiveReg { out, .. } | Insn::Load { out, .. } | Insn::LoadSExt { out, .. } | @@ -553,6 +655,7 @@ impl Insn { Insn::Or { out, .. } | Insn::RShift { out, .. } | Insn::Sub { out, .. } | + Insn::Mul { out, .. } | Insn::URShift { out, .. } | Insn::Xor { out, .. } => Some(out), _ => None @@ -576,7 +679,7 @@ impl Insn { Insn::CSelNZ { out, .. } | Insn::CSelZ { out, .. } | Insn::Lea { out, .. } | - Insn::LeaLabel { out, .. } | + Insn::LeaJumpTarget { out, .. } | Insn::LiveReg { out, .. } | Insn::Load { out, .. } | Insn::LoadSExt { out, .. } | @@ -585,6 +688,7 @@ impl Insn { Insn::Or { out, .. } | Insn::RShift { out, .. } | Insn::Sub { out, .. } | + Insn::Mul { out, .. } | Insn::URShift { out, .. } | Insn::Xor { out, .. } => Some(out), _ => None @@ -595,14 +699,17 @@ impl Insn { pub fn target(&self) -> Option<&Target> { match self { Insn::Jbe(target) | + Insn::Jb(target) | Insn::Je(target) | Insn::Jl(target) | + Insn::Jg(target) | + Insn::Jge(target) | Insn::Jmp(target) | Insn::Jne(target) | Insn::Jnz(target) | Insn::Jo(target) | Insn::Jz(target) | - Insn::LeaLabel { target, .. } => Some(target), + Insn::LeaJumpTarget { target, .. } => Some(target), _ => None } } @@ -644,17 +751,22 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::FrameSetup | Insn::FrameTeardown | Insn::Jbe(_) | + Insn::Jb(_) | Insn::Je(_) | Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | Insn::Jmp(_) | Insn::Jne(_) | Insn::Jnz(_) | Insn::Jo(_) | + Insn::JoMul(_) | Insn::Jz(_) | Insn::Label(_) | - Insn::LeaLabel { .. } | + Insn::LeaJumpTarget { .. } | Insn::PadInvalPatch | Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | Insn::CPush(opnd) | Insn::CRet(opnd) | @@ -663,6 +775,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::LiveReg { opnd, .. } | Insn::Load { opnd, .. } | Insn::LoadSExt { opnd, .. } | + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | Insn::Not { opnd, .. } => { match self.idx { 0 => { @@ -691,6 +805,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::RShift { opnd: opnd0, shift: opnd1, .. } | Insn::Store { dest: opnd0, src: opnd1 } | Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Mul { left: opnd0, right: opnd1, .. } | Insn::Test { left: opnd0, right: opnd1 } | Insn::URShift { opnd: opnd0, shift: opnd1, .. } | Insn::Xor { left: opnd0, right: opnd1, .. } => { @@ -741,17 +856,22 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::FrameSetup | Insn::FrameTeardown | Insn::Jbe(_) | + Insn::Jb(_) | Insn::Je(_) | Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | Insn::Jmp(_) | Insn::Jne(_) | Insn::Jnz(_) | Insn::Jo(_) | + Insn::JoMul(_) | Insn::Jz(_) | Insn::Label(_) | - Insn::LeaLabel { .. } | + Insn::LeaJumpTarget { .. } | Insn::PadInvalPatch | Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | Insn::CPush(opnd) | Insn::CRet(opnd) | @@ -760,6 +880,8 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::LiveReg { opnd, .. } | Insn::Load { opnd, .. } | Insn::LoadSExt { opnd, .. } | + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | Insn::Not { opnd, .. } => { match self.idx { 0 => { @@ -788,6 +910,7 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::RShift { opnd: opnd0, shift: opnd1, .. } | Insn::Store { dest: opnd0, src: opnd1 } | Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Mul { left: opnd0, right: opnd1, .. } | Insn::Test { left: opnd0, right: opnd1 } | Insn::URShift { opnd: opnd0, shift: opnd1, .. } | Insn::Xor { left: opnd0, right: opnd1, .. } => { @@ -842,10 +965,60 @@ impl fmt::Debug for Insn { } } +/// Set of variables used for generating side exits +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct SideExitContext { + /// PC of the instruction being compiled + pub pc: *mut VALUE, + + /// Context fields used by get_generic_ctx() + pub stack_size: u8, + pub sp_offset: i8, + pub reg_mapping: RegMapping, + pub is_return_landing: bool, + pub is_deferred: bool, +} + +impl SideExitContext { + /// Convert PC and Context into SideExitContext + pub fn new(pc: *mut VALUE, ctx: Context) -> Self { + let exit_ctx = SideExitContext { + pc, + stack_size: ctx.get_stack_size(), + sp_offset: ctx.get_sp_offset(), + reg_mapping: ctx.get_reg_mapping(), + is_return_landing: ctx.is_return_landing(), + is_deferred: ctx.is_deferred(), + }; + if cfg!(debug_assertions) { + // Assert that we're not losing any mandatory metadata + assert_eq!(exit_ctx.get_ctx(), ctx.get_generic_ctx()); + } + exit_ctx + } + + /// Convert SideExitContext to Context + fn get_ctx(&self) -> Context { + let mut ctx = Context::default(); + ctx.set_stack_size(self.stack_size); + ctx.set_sp_offset(self.sp_offset); + ctx.set_reg_mapping(self.reg_mapping); + if self.is_return_landing { + ctx.set_as_return_landing(); + } + if self.is_deferred { + ctx.mark_as_deferred(); + } + ctx + } +} + +/// Initial capacity for asm.insns vector +const ASSEMBLER_INSNS_CAPACITY: usize = 256; + /// Object into which we assemble instructions to be /// optimized and lowered -pub struct Assembler -{ +pub struct Assembler { pub(super) insns: Vec<Insn>, /// Parallel vec with insns @@ -854,22 +1027,81 @@ pub struct Assembler /// Names of labels pub(super) label_names: Vec<String>, + + /// Context for generating the current insn + pub ctx: Context, + + /// The current ISEQ's local table size. asm.local_opnd() uses this, and it's + /// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall(). + /// + /// `None` means we're not assembling for an ISEQ, or that the local size is + /// not relevant. + pub(super) num_locals: Option<u32>, + + /// Side exit caches for each SideExitContext + pub(super) side_exits: HashMap<SideExitContext, CodePtr>, + + /// PC for Target::SideExit + side_exit_pc: Option<*mut VALUE>, + + /// Stack size for Target::SideExit + side_exit_stack_size: Option<u8>, + + /// If true, the next ccall() should verify its leafness + leaf_ccall: bool, } impl Assembler { - pub fn new() -> Self { - Self::new_with_label_names(Vec::default()) + /// Create an Assembler for ISEQ-specific code. + /// It includes all inline code and some outlined code like side exits and stubs. + pub fn new(num_locals: u32) -> Self { + Self::new_with_label_names(Vec::default(), HashMap::default(), Some(num_locals)) + } + + /// Create an Assembler for outlined code that are not specific to any ISEQ, + /// e.g. trampolines that are shared globally. + pub fn new_without_iseq() -> Self { + Self::new_with_label_names(Vec::default(), HashMap::default(), None) } - pub fn new_with_label_names(label_names: Vec<String>) -> Self { + /// Create an Assembler with parameters that are populated by another Assembler instance. + /// This API is used for copying an Assembler for the next compiler pass. + pub fn new_with_label_names( + label_names: Vec<String>, + side_exits: HashMap<SideExitContext, CodePtr>, + num_locals: Option<u32> + ) -> Self { Self { - insns: Vec::default(), - live_ranges: Vec::default(), - label_names + insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), + live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), + label_names, + ctx: Context::default(), + num_locals, + side_exits, + side_exit_pc: None, + side_exit_stack_size: None, + leaf_ccall: false, } } + /// Get the list of registers that can be used for stack temps. + pub fn get_temp_regs() -> &'static [Reg] { + let num_regs = get_option!(num_temp_regs); + &TEMP_REGS[0..num_regs] + } + + /// Get the number of locals for the ISEQ being compiled + pub fn get_num_locals(&self) -> Option<u32> { + self.num_locals + } + + /// Set a context for generating side exits + pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) { + self.side_exit_pc = Some(pc); + self.side_exit_stack_size = Some(stack_size); + } + /// Build an Opnd::InsnOut from the current index of the assembler and the /// given number of bits. pub(super) fn next_opnd_out(&self, num_bits: u8) -> Opnd { @@ -879,31 +1111,75 @@ impl Assembler /// Append an instruction onto the current list of instructions and update /// the live ranges of any instructions whose outputs are being used as /// operands to this instruction. - pub(super) fn push_insn(&mut self, insn: Insn) { + pub fn push_insn(&mut self, mut insn: Insn) { // Index of this instruction let insn_idx = self.insns.len(); - // If we find any InsnOut from previous instructions, we're going to - // update the live range of the previous instruction to point to this - // one. - for opnd in insn.opnd_iter() { - match opnd { + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + match *opnd { + // If we find any InsnOut from previous instructions, we're going to update + // the live range of the previous instruction to point to this one. Opnd::InsnOut { idx, .. } => { - assert!(*idx < self.insns.len()); - self.live_ranges[*idx] = insn_idx; + assert!(idx < self.insns.len()); + self.live_ranges[idx] = insn_idx; } Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { - assert!(*idx < self.insns.len()); - self.live_ranges[*idx] = insn_idx; + assert!(idx < self.insns.len()); + self.live_ranges[idx] = insn_idx; + } + // Set current ctx.reg_mapping to Opnd::Stack. + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: None } => { + assert_eq!( + self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16, + stack_size as i16 - sp_offset as i16, + "Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})", + stack_size, sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(), + ); + *opnd = Opnd::Stack { + idx, + num_bits, + stack_size, + num_locals, + sp_offset, + reg_mapping: Some(self.ctx.get_reg_mapping()), + }; } _ => {} } } + // Set a side exit context to Target::SideExit + if let Some(Target::SideExit { context, .. }) = insn.target_mut() { + // We should skip this when this instruction is being copied from another Assembler. + if context.is_none() { + *context = Some(SideExitContext::new( + self.side_exit_pc.unwrap(), + self.ctx.with_stack_size(self.side_exit_stack_size.unwrap()), + )); + } + } + self.insns.push(insn); self.live_ranges.push(insn_idx); } + /// Get a cached side exit, wrapping a counter if specified + pub fn get_side_exit(&mut self, side_exit_context: &SideExitContext, counter: Option<Counter>, ocb: &mut OutlinedCb) -> Option<CodePtr> { + // Get a cached side exit + let side_exit = match self.side_exits.get(&side_exit_context) { + None => { + let exit_code = gen_outlined_exit(side_exit_context.pc, self.num_locals.unwrap(), &side_exit_context.get_ctx(), ocb)?; + self.side_exits.insert(*side_exit_context, exit_code); + exit_code + } + Some(code_ptr) => *code_ptr, + }; + + // Wrap a counter if needed + gen_counted_exit(side_exit_context.pc, side_exit, ocb, counter) + } + /// Create a new label instance that we can jump to pub fn new_label(&mut self, name: &str) -> Target { @@ -914,6 +1190,198 @@ impl Assembler Target::Label(label_idx) } + /// Convert Opnd::Stack to Opnd::Mem or Opnd::Reg + pub fn lower_stack_opnd(&self, opnd: &Opnd) -> Opnd { + // Convert Opnd::Stack to Opnd::Mem + fn mem_opnd(opnd: &Opnd) -> Opnd { + if let Opnd::Stack { idx, sp_offset, num_bits, .. } = *opnd { + incr_counter!(temp_mem_opnd); + Opnd::mem(num_bits, SP, (sp_offset as i32 - idx - 1) * SIZEOF_VALUE_I32) + } else { + unreachable!() + } + } + + // Convert Opnd::Stack to Opnd::Reg + fn reg_opnd(opnd: &Opnd, reg_idx: usize) -> Opnd { + let regs = Assembler::get_temp_regs(); + if let Opnd::Stack { num_bits, .. } = *opnd { + incr_counter!(temp_reg_opnd); + Opnd::Reg(regs[reg_idx]).with_num_bits(num_bits).unwrap() + } else { + unreachable!() + } + } + + match opnd { + Opnd::Stack { reg_mapping, .. } => { + if let Some(reg_idx) = reg_mapping.unwrap().get_reg(opnd.reg_opnd()) { + reg_opnd(opnd, reg_idx) + } else { + mem_opnd(opnd) + } + } + _ => unreachable!(), + } + } + + /// Allocate a register to a stack temp if available. + pub fn alloc_reg(&mut self, mapping: RegOpnd) { + // Allocate a register if there's no conflict. + let mut reg_mapping = self.ctx.get_reg_mapping(); + if reg_mapping.alloc_reg(mapping) { + self.set_reg_mapping(reg_mapping); + } + } + + /// Erase local variable type information + /// eg: because of a call we can't track + pub fn clear_local_types(&mut self) { + asm_comment!(self, "clear local variable types"); + self.ctx.clear_local_types(); + } + + /// Repurpose stack temp registers to the corresponding locals for arguments + pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> { + let mut callee_reg_mapping = callee_ctx.get_reg_mapping(); + let mut mapped_temps = vec![]; + + for arg_idx in 0..argc { + let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap(); + let temp_opnd = RegOpnd::Stack(stack_idx); + + // For each argument, if the stack temp for it has a register, + // let the callee use the register for the local variable. + if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) { + let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap()); + callee_reg_mapping.set_reg(local_opnd, reg_idx); + mapped_temps.push(temp_opnd); + } + } + + asm_comment!(self, "local maps: {:?}", callee_reg_mapping); + callee_ctx.set_reg_mapping(callee_reg_mapping); + mapped_temps + } + + /// Spill all live registers to the stack + pub fn spill_regs(&mut self) { + self.spill_regs_except(&vec![]); + } + + /// Spill all live registers except `ignored_temps` to the stack + pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) { + // Forget registers above the stack top + let mut reg_mapping = self.ctx.get_reg_mapping(); + for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 { + reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)); + } + self.set_reg_mapping(reg_mapping); + + // If no registers are in use, skip all checks + if self.ctx.get_reg_mapping() == RegMapping::default() { + return; + } + + // Collect stack temps to be spilled + let mut spilled_opnds = vec![]; + for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) { + let reg_opnd = RegOpnd::Stack(stack_idx); + if !ignored_temps.contains(®_opnd) && reg_mapping.dealloc_reg(reg_opnd) { + let idx = self.ctx.get_stack_size() - 1 - stack_idx; + let spilled_opnd = self.stack_opnd(idx.into()); + spilled_opnds.push(spilled_opnd); + reg_mapping.dealloc_reg(spilled_opnd.reg_opnd()); + } + } + + // Collect locals to be spilled + for local_idx in 0..MAX_CTX_TEMPS as u8 { + if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) { + let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1; + let ep_offset = first_local_ep_offset - local_idx as u32; + let spilled_opnd = self.local_opnd(ep_offset); + spilled_opnds.push(spilled_opnd); + reg_mapping.dealloc_reg(spilled_opnd.reg_opnd()); + } + } + + // Spill stack temps and locals + if !spilled_opnds.is_empty() { + asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping); + for &spilled_opnd in spilled_opnds.iter() { + self.spill_reg(spilled_opnd); + } + self.ctx.set_reg_mapping(reg_mapping); + } + } + + /// Spill a stack temp from a register to the stack + pub fn spill_reg(&mut self, opnd: Opnd) { + assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None); + + // Use different RegMappings for dest and src operands + let reg_mapping = self.ctx.get_reg_mapping(); + let mut mem_mappings = reg_mapping; + mem_mappings.dealloc_reg(opnd.reg_opnd()); + + // Move the stack operand from a register to memory + match opnd { + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, .. } => { + self.mov( + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(mem_mappings) }, + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(reg_mapping) }, + ); + } + _ => unreachable!(), + } + incr_counter!(temp_spill); + } + + /// Update which stack temps are in a register + pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) { + if self.ctx.get_reg_mapping() != reg_mapping { + asm_comment!(self, "reg_mapping: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping); + self.ctx.set_reg_mapping(reg_mapping); + } + } + + // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG, + // so that they will not rewrite each other before they are used. + pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> { + // Return the index of a move whose destination is not used as a source if any. + fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> { + moves.iter().enumerate().find(|(_, &(dest_reg, _))| { + moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg)) + }).map(|(index, _)| index) + } + + // Remove moves whose source and destination are the same + let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter() + .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect(); + + let mut new_moves = vec![]; + while old_moves.len() > 0 { + // Keep taking safe moves + while let Some(index) = find_safe_move(&old_moves) { + new_moves.push(old_moves.remove(index)); + } + + // No safe move. Load the source of one move into SCRATCH_REG, and + // then load SCRATCH_REG into the destination when it's safe. + if old_moves.len() > 0 { + // Make sure it's safe to use SCRATCH_REG + assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG))); + + // Move SCRATCH <- opnd, and delay reg <- SCRATCH + let (reg, opnd) = old_moves.remove(0); + new_moves.push((Assembler::SCRATCH_REG, opnd)); + old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG))); + } + } + new_moves + } + /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -959,6 +1427,19 @@ impl Assembler } } + // Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes. + fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) { + if shift_offset >= 0 { + for index in 0..(shift_offset as usize) { + live_ranges.insert(start_index + index, start_index + index); + } + } else { + for _ in 0..-shift_offset { + live_ranges.remove(start_index); + } + } + } + // Dump live registers for register spill debugging. fn dump_live_regs(insns: Vec<Insn>, live_ranges: Vec<usize>, num_regs: usize, spill_index: usize) { // Convert live_ranges to live_regs: the number of live registers at each index @@ -982,11 +1463,18 @@ impl Assembler } } + // We may need to reorder LoadInto instructions with a C argument operand. + // This buffers the operands of such instructions to process them in batches. + let mut c_args: Vec<(Reg, Opnd)> = vec![]; + + // live_ranges is indexed by original `index` given by the iterator. let live_ranges: Vec<usize> = take(&mut self.live_ranges); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + // shifted_live_ranges is indexed by mapped indexes in insn operands. + let mut shifted_live_ranges: Vec<usize> = live_ranges.clone(); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let mut iterator = self.into_draining_iter(); - while let Some((index, mut insn)) = iterator.next_unmapped() { + while let Some((index, mut insn)) = iterator.next_mapped() { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. @@ -997,12 +1485,11 @@ impl Assembler // Since we have an InsnOut, we know it spans more that one // instruction. let start_index = *idx; - assert!(start_index < index); // We're going to check if this is the last instruction that // uses this operand. If it is, we can return the allocated // register to the pool. - if live_ranges[start_index] == index { + if shifted_live_ranges[start_index] == index { if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { dealloc_reg(&mut pool, ®s, reg); } else { @@ -1049,7 +1536,7 @@ impl Assembler let mut opnd_iter = insn.opnd_iter(); if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() { - if live_ranges[*idx] == index { + if shifted_live_ranges[*idx] == index { if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() { out_reg = Some(take_reg(&mut pool, ®s, reg)); } @@ -1106,39 +1593,57 @@ impl Assembler } } - asm.push_insn(insn); + // Push instruction(s). Batch and reorder C argument operations if needed. + if let Insn::LoadInto { dest: Opnd::CArg(reg), opnd } = insn { + // Buffer C arguments + c_args.push((reg, opnd)); + } else { + // C arguments are buffered until CCall + if c_args.len() > 0 { + // Resolve C argument dependencies + let c_args_len = c_args.len() as isize; + let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args)); + shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); + + // Push batched C arguments + for (reg, opnd) in moves { + asm.load_into(Opnd::Reg(reg), opnd); + } + } + // Other instructions are pushed as is + asm.push_insn(insn); + } + iterator.map_insn_index(&mut asm); } assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); asm } - /// Compile the instructions down to machine code - /// NOTE: should compile return a list of block labels to enable - /// compiling multiple blocks at a time? - pub fn compile(self, cb: &mut CodeBlock) -> Vec<u32> + /// Compile the instructions down to machine code. + /// Can fail due to lack of code memory and inopportune code placement, among other reasons. + #[must_use] + pub fn compile(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>) -> Option<(CodePtr, Vec<u32>)> { - #[cfg(feature = "disasm")] let start_addr = cb.get_write_ptr(); - let alloc_regs = Self::get_alloc_regs(); - let gc_offsets = self.compile_with_regs(cb, alloc_regs); + let ret = self.compile_with_regs(cb, ocb, alloc_regs); - #[cfg(feature = "disasm")] if let Some(dump_disasm) = get_option_ref!(dump_disasm) { use crate::disasm::dump_disasm_addr_range; let end_addr = cb.get_write_ptr(); dump_disasm_addr_range(cb, start_addr, end_addr, dump_disasm) } - gc_offsets + ret } /// Compile with a limited number of registers. Used only for unit tests. - pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec<u32> + #[cfg(test)] + pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> (CodePtr, Vec<u32>) { let mut alloc_regs = Self::get_alloc_regs(); let alloc_regs = alloc_regs.drain(0..num_regs).collect(); - self.compile_with_regs(cb, alloc_regs) + self.compile_with_regs(cb, None, alloc_regs).unwrap() } /// Consume the assembler by creating a new draining iterator. @@ -1146,16 +1651,21 @@ impl Assembler AssemblerDrainingIterator::new(self) } - /// Consume the assembler by creating a new lookback iterator. - pub fn into_lookback_iter(self) -> AssemblerLookbackIterator { - AssemblerLookbackIterator::new(self) + /// Return true if the next ccall() is expected to be leaf. + pub fn get_leaf_ccall(&mut self) -> bool { + self.leaf_ccall + } + + /// Assert that the next ccall() is going to be leaf. + pub fn expect_leaf_ccall(&mut self) { + self.leaf_ccall = true; } } /// A struct that allows iterating through an assembler's instructions and /// consuming them as it iterates. pub struct AssemblerDrainingIterator { - insns: std::vec::IntoIter<Insn>, + insns: std::iter::Peekable<std::vec::IntoIter<Insn>>, index: usize, indices: Vec<usize> } @@ -1163,9 +1673,9 @@ pub struct AssemblerDrainingIterator { impl AssemblerDrainingIterator { fn new(asm: Assembler) -> Self { Self { - insns: asm.insns.into_iter(), + insns: asm.insns.into_iter().peekable(), index: 0, - indices: Vec::default() + indices: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), } } @@ -1177,10 +1687,11 @@ impl AssemblerDrainingIterator { /// end of the current list of instructions in order to maintain that /// alignment. pub fn map_insn_index(&mut self, asm: &mut Assembler) { - self.indices.push(asm.insns.len() - 1); + self.indices.push(asm.insns.len().saturating_sub(1)); } /// Map an operand by using this iterator's list of mapped indices. + #[cfg(target_arch = "x86_64")] pub fn map_opnd(&self, opnd: Opnd) -> Opnd { opnd.map_index(&self.indices) } @@ -1205,51 +1716,10 @@ impl AssemblerDrainingIterator { self.index += 1; self.insns.next().map(|insn| (index, insn)) } -} - -/// A struct that allows iterating through references to an assembler's -/// instructions without consuming them. -pub struct AssemblerLookbackIterator { - asm: Assembler, - index: Cell<usize> -} - -impl AssemblerLookbackIterator { - fn new(asm: Assembler) -> Self { - Self { asm, index: Cell::new(0) } - } - - /// Fetches a reference to an instruction at a specific index. - pub fn get(&self, index: usize) -> Option<&Insn> { - self.asm.insns.get(index) - } - /// Fetches a reference to an instruction in the list relative to the - /// current cursor location of this iterator. - pub fn get_relative(&self, difference: i32) -> Option<&Insn> { - let index: Result<i32, _> = self.index.get().try_into(); - let relative: Result<usize, _> = index.and_then(|value| (value + difference).try_into()); - relative.ok().and_then(|value| self.asm.insns.get(value)) - } - - /// Fetches the previous instruction relative to the current cursor location - /// of this iterator. - pub fn get_previous(&self) -> Option<&Insn> { - self.get_relative(-1) - } - - /// Fetches the next instruction relative to the current cursor location of - /// this iterator. - pub fn get_next(&self) -> Option<&Insn> { - self.get_relative(1) - } - - /// Returns the next instruction in the list with the indices corresponding - /// to the previous list of instructions. - pub fn next_unmapped(&self) -> Option<(usize, &Insn)> { - let index = self.index.get(); - self.index.set(index + 1); - self.asm.insns.get(index).map(|insn| (index, insn)) + /// Returns the next instruction without incrementing the iterator's index. + pub fn peek(&mut self) -> Option<&Insn> { + self.insns.peek() } } @@ -1284,22 +1754,67 @@ impl Assembler { self.push_insn(Insn::BakeString(text.to_string())); } + #[allow(dead_code)] pub fn breakpoint(&mut self) { self.push_insn(Insn::Breakpoint); } pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd { + // Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set + let canary_opnd = self.set_stack_canary(&opnds); + + let old_temps = self.ctx.get_reg_mapping(); // with registers + // Spill stack temp registers since they are caller-saved registers. + // Note that this doesn't spill stack temps that are already popped + // but may still be used in the C arguments. + self.spill_regs(); + let new_temps = self.ctx.get_reg_mapping(); // all spilled + + // Temporarily manipulate RegMappings so that we can use registers + // to pass stack operands that are already spilled above. + self.ctx.set_reg_mapping(old_temps); + + // Call a C function let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn::CCall { fptr, opnds, out }); + + // Registers in old_temps may be clobbered by the above C call, + // so rollback the manipulated RegMappings to a spilled version. + self.ctx.set_reg_mapping(new_temps); + + // Clear the canary after use + if let Some(canary_opnd) = canary_opnd { + self.mov(canary_opnd, 0.into()); + } + out } - pub fn cmp(&mut self, left: Opnd, right: Opnd) { - self.push_insn(Insn::Cmp { left, right }); + /// Let vm_check_canary() assert the leafness of this ccall if leaf_ccall is set + fn set_stack_canary(&mut self, opnds: &Vec<Opnd>) -> Option<Opnd> { + // Use the slot right above the stack top for verifying leafness. + let canary_opnd = self.stack_opnd(-1); + + // If the slot is already used, which is a valid optimization to avoid spills, + // give up the verification. + let canary_opnd = if cfg!(feature = "runtime_checks") && self.leaf_ccall && opnds.iter().all(|opnd| + opnd.get_reg_opnd() != canary_opnd.get_reg_opnd() + ) { + asm_comment!(self, "set stack canary"); + self.mov(canary_opnd, vm_stack_canary().into()); + Some(canary_opnd) + } else { + None + }; + + // Avoid carrying the flag to the next instruction whether we verified it or not. + self.leaf_ccall = false; + + canary_opnd } - pub fn comment(&mut self, text: &str) { - self.push_insn(Insn::Comment(text.to_string())); + pub fn cmp(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn::Cmp { left, right }); } #[must_use] @@ -1309,8 +1824,12 @@ impl Assembler { out } - pub fn cpop_all(&mut self) { + pub fn cpop_all(&mut self, reg_mapping: RegMapping) { self.push_insn(Insn::CPopAll); + + // Re-enable ccall's RegMappings assertion disabled by cpush_all. + // cpush_all + cpop_all preserve all stack temp registers, so it's safe. + self.set_reg_mapping(reg_mapping); } pub fn cpop_into(&mut self, opnd: Opnd) { @@ -1321,8 +1840,16 @@ impl Assembler { self.push_insn(Insn::CPush(opnd)); } - pub fn cpush_all(&mut self) { + pub fn cpush_all(&mut self) -> RegMapping { self.push_insn(Insn::CPushAll); + + // Mark all temps as not being in registers. + // Temps will be marked back as being in registers by cpop_all. + // We assume that cpush_all + cpop_all are used for C functions in utils.rs + // that don't require spill_regs for GC. + let mapping = self.ctx.get_reg_mapping(); + self.set_reg_mapping(RegMapping::default()); + mapping } pub fn cret(&mut self, opnd: Opnd) { @@ -1401,6 +1928,10 @@ impl Assembler { self.push_insn(Insn::Jbe(target)); } + pub fn jb(&mut self, target: Target) { + self.push_insn(Insn::Jb(target)); + } + pub fn je(&mut self, target: Target) { self.push_insn(Insn::Je(target)); } @@ -1409,6 +1940,16 @@ impl Assembler { self.push_insn(Insn::Jl(target)); } + #[allow(dead_code)] + pub fn jg(&mut self, target: Target) { + self.push_insn(Insn::Jg(target)); + } + + #[allow(dead_code)] + pub fn jge(&mut self, target: Target) { + self.push_insn(Insn::Jge(target)); + } + pub fn jmp(&mut self, target: Target) { self.push_insn(Insn::Jmp(target)); } @@ -1429,6 +1970,10 @@ impl Assembler { self.push_insn(Insn::Jo(target)); } + pub fn jo_mul(&mut self, target: Target) { + self.push_insn(Insn::JoMul(target)); + } + pub fn jz(&mut self, target: Target) { self.push_insn(Insn::Jz(target)); } @@ -1441,9 +1986,9 @@ impl Assembler { } #[must_use] - pub fn lea_label(&mut self, target: Target) -> Opnd { + pub fn lea_jump_target(&mut self, target: Target) -> Opnd { let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); - self.push_insn(Insn::LeaLabel { target, out }); + self.push_insn(Insn::LeaJumpTarget { target, out }); out } @@ -1462,7 +2007,10 @@ impl Assembler { } pub fn load_into(&mut self, dest: Opnd, opnd: Opnd) { - self.push_insn(Insn::LoadInto { dest, opnd }); + match (dest, opnd) { + (Opnd::Reg(dest), Opnd::Reg(opnd)) if dest == opnd => {}, // skip if noop + _ => self.push_insn(Insn::LoadInto { dest, opnd }), + } } #[must_use] @@ -1502,7 +2050,7 @@ impl Assembler { } //pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F) - pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr, &CodeBlock) + 'static) { self.push_insn(Insn::PosMarker(Box::new(marker_fn))); } @@ -1524,17 +2072,35 @@ impl Assembler { out } + #[must_use] + pub fn mul(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Mul { left, right, out }); + out + } + pub fn test(&mut self, left: Opnd, right: Opnd) { self.push_insn(Insn::Test { left, right }); } #[must_use] + #[allow(dead_code)] pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); self.push_insn(Insn::URShift { opnd, shift, out }); out } + /// Verify the leafness of the given block + pub fn with_leaf_ccall<F, R>(&mut self, mut block: F) -> R + where F: FnMut(&mut Self) -> R { + let old_leaf_ccall = self.leaf_ccall; + self.leaf_ccall = true; + let ret = block(self); + self.leaf_ccall = old_leaf_ccall; + ret + } + /// Add a label at the current position pub fn write_label(&mut self, target: Target) { assert!(target.unwrap_label_idx() < self.label_names.len()); @@ -1549,6 +2115,17 @@ impl Assembler { } } +/// Macro to use format! for Insn::Comment, which skips a format! call +/// when not dumping disassembly. +macro_rules! asm_comment { + ($asm:expr, $($fmt:tt)*) => { + if $crate::options::get_option_ref!(dump_disasm).is_some() { + $asm.push_insn(Insn::Comment(format!($($fmt)*))); + } + }; +} +pub(crate) use asm_comment; + #[cfg(test)] mod tests { use super::*; diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 4794695094..6921244c72 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -4,5 +4,11 @@ pub mod x86_64; #[cfg(target_arch = "aarch64")] pub mod arm64; +#[cfg(target_arch = "x86_64")] +pub use x86_64 as current; + +#[cfg(target_arch = "aarch64")] +pub use arm64 as current; + pub mod ir; mod tests; diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 3098c7e3b0..bfeea5163a 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -1,19 +1,19 @@ #![cfg(test)] -use crate::asm::{CodeBlock}; +use crate::asm::CodeBlock; use crate::backend::ir::*; use crate::cruby::*; use crate::utils::c_callable; #[test] fn test_add() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let out = asm.add(SP, Opnd::UImm(1)); let _ = asm.add(out, Opnd::UImm(2)); } #[test] fn test_alloc_regs() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); // Get the first output that we're going to reuse later. let out1 = asm.add(EC, Opnd::UImm(1)); @@ -62,7 +62,7 @@ fn test_alloc_regs() { fn setup_asm() -> (Assembler, CodeBlock) { return ( - Assembler::new(), + Assembler::new(0), CodeBlock::new_dummy(1024) ); } @@ -87,7 +87,7 @@ fn test_mov_mem2mem() { let (mut asm, mut cb) = setup_asm(); - asm.comment("check that comments work too"); + asm_comment!(asm, "check that comments work too"); asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); asm.compile_with_num_regs(&mut cb, 1); @@ -194,12 +194,12 @@ fn test_c_call() #[test] fn test_alloc_ccall_regs() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let out1 = asm.ccall(0 as *const u8, vec![]); let out2 = asm.ccall(0 as *const u8, vec![out1]); asm.mov(EC, out2); let mut cb = CodeBlock::new_dummy(1024); - asm.compile_with_regs(&mut cb, Assembler::get_alloc_regs()); + asm.compile_with_regs(&mut cb, None, Assembler::get_alloc_regs()); } #[test] @@ -231,10 +231,10 @@ fn test_jcc_ptr() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); - let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK as i32)); asm.test( - Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32), not_mask, ); asm.jnz(side_exit); @@ -248,7 +248,7 @@ fn test_jmp_ptr() { let (mut asm, mut cb) = setup_asm(); - let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); + let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); asm.jmp(stub); asm.compile_with_num_regs(&mut cb, 0); @@ -259,7 +259,7 @@ fn test_jo() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); + let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); let arg1 = Opnd::mem(64, SP, 0); let arg0 = Opnd::mem(64, SP, 8); @@ -283,8 +283,7 @@ fn test_bake_string() { #[test] fn test_draining_iterator() { - - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let _ = asm.load(Opnd::None); asm.store(Opnd::None, Opnd::None); @@ -303,25 +302,6 @@ fn test_draining_iterator() { } #[test] -fn test_lookback_iterator() { - let mut asm = Assembler::new(); - - let _ = asm.load(Opnd::None); - asm.store(Opnd::None, Opnd::None); - asm.store(Opnd::None, Opnd::None); - - let iter = asm.into_lookback_iter(); - - while let Some((index, insn)) = iter.next_unmapped() { - if index > 0 { - let opnd_iter = iter.get_previous().unwrap().opnd_iter(); - assert_eq!(opnd_iter.take(1).next(), Some(&Opnd::None)); - assert!(matches!(insn, Insn::Store { .. })); - } - } -} - -#[test] fn test_cmp_8_bit() { let (mut asm, mut cb) = setup_asm(); let reg = Assembler::get_alloc_regs()[0]; @@ -329,3 +309,21 @@ fn test_cmp_8_bit() { asm.compile_with_num_regs(&mut cb, 1); } + +#[test] +fn test_no_pos_marker_callback_when_compile_fails() { + // When compilation fails (e.g. when out of memory), the code written out is malformed. + // We don't want to invoke the pos_marker callbacks with positions of malformed code. + let mut asm = Assembler::new(0); + + // Markers around code to exhaust memory limit + let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called"); + asm.pos_marker(fail_if_called); + let zero = asm.load(0.into()); + let sum = asm.add(zero, 500.into()); + asm.store(Opnd::mem(64, SP, 8), sum); + asm.pos_marker(fail_if_called); + + let cb = &mut CodeBlock::new_dummy(8); + assert!(asm.compile(cb, None).is_none(), "should fail due to tiny size limit"); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 297a0fd852..ef435bca7e 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -1,15 +1,12 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - use std::mem::take; use crate::asm::*; use crate::asm::x86_64::*; -use crate::codegen::{JITState}; +use crate::codegen::CodePtr; use crate::cruby::*; use crate::backend::ir::*; -use crate::codegen::CodegenGlobals; +use crate::options::*; +use crate::utils::*; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -33,8 +30,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = RAX_REG; pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); -// The number of bytes that are generated by jmp_ptr -pub const JMP_PTR_BYTES: usize = 6; +impl CodeBlock { + // The number of bytes that are generated by jmp_ptr + pub fn jmp_ptr_bytes(&self) -> usize { 5 } +} /// Map Opnd to X86Opnd impl From<Opnd> for X86Opnd { @@ -80,12 +79,16 @@ impl From<&Opnd> for X86Opnd { } } +/// List of registers that can be used for stack temps and locals. +pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + impl Assembler { // A special scratch register for intermediate processing. - // Note: right now this is only used by LeaLabel because label_ref accepts - // a closure and we don't want it to have to capture anything. - const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = R11_REG; + const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); + /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec<Reg> @@ -109,7 +112,7 @@ impl Assembler fn x86_split(mut self) -> Assembler { let live_ranges: Vec<usize> = take(&mut self.live_ranges); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let mut iterator = self.into_draining_iter(); while let Some((index, mut insn)) = iterator.next_unmapped() { @@ -132,7 +135,7 @@ impl Assembler // Opnd::Value operands into registers here because: // // - Most instructions can't be encoded with 64-bit immediates. - // - We look for Op::Load specifically when emiting to keep GC'ed + // - We look for Op::Load specifically when emitting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. let mut unmapped_opnds: Vec<Opnd> = vec![]; @@ -140,21 +143,23 @@ impl Assembler let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { + if let Opnd::Stack { .. } = opnd { + *opnd = asm.lower_stack_opnd(opnd); + } unmapped_opnds.push(*opnd); - *opnd = if is_load { - iterator.map_opnd(*opnd) - } else if let Opnd::Value(value) = opnd { - // Since mov(mem64, imm32) sign extends, as_i64() makes sure - // we split when the extended value is different. - if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { - asm.load(iterator.map_opnd(*opnd)) - } else { - Opnd::UImm(value.as_u64()) + *opnd = match opnd { + Opnd::Value(value) if !is_load => { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + Opnd::UImm(value.as_u64()) + } } - } else { - iterator.map_opnd(*opnd) - } + _ => iterator.map_opnd(*opnd), + }; } // We are replacing instructions here so we know they are already @@ -163,40 +168,86 @@ impl Assembler match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | + Insn::Mul { left, right, out } | Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { - match (unmapped_opnds[0], unmapped_opnds[1]) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - *left = asm.load(*left); - *right = asm.load(*right); - }, - (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { - *left = asm.load(*left); - }, - // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { idx, .. }, _) => { - if live_ranges[idx] > index { - *left = asm.load(*left); + match (&left, &right, iterator.peek()) { + // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible + (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src })) + if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) + if out == src && live_ranges[index] == index + 1 && { + // We want to do `dest == left`, but `left` has already gone + // through lower_stack_opnd() while `dest` has not. So we + // lower `dest` before comparing. + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + lowered_dest == *left + } => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + *left = asm.load(*left); + *right = asm.load(*right); + }, + (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { + *left = asm.load(*left); + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + *left = asm.load(*left); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *left = asm.load(*left); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); + asm.push_insn(insn); + } + } + }, + Insn::Cmp { left, right } => { + // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes) + // when next IR is `je`, `jne`, `csel_e`, or `csel_ne` + match (&left, &right, iterator.peek()) { + (Opnd::InsnOut { .. }, + Opnd::UImm(0) | Opnd::Imm(0), + Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => { + asm.push_insn(Insn::Test { left: *left, right: *left }); + } + _ => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; } - }, - // We have to load memory operands to avoid corrupting them - (Opnd::Mem(_) | Opnd::Reg(_), _) => { - *left = asm.load(*left); - }, - _ => {} - }; - - *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); - asm.push_insn(insn); + asm.push_insn(insn); + } + } }, - Insn::Cmp { left, right } | Insn::Test { left, right } => { if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { let loaded = asm.load(*right); *right = loaded; } - asm.push_insn(insn); }, // These instructions modify their input operand in-place, so we @@ -237,7 +288,11 @@ impl Assembler *truthy = asm.load(*truthy); } }, - Opnd::UImm(_) | Opnd::Imm(_) | Opnd::Value(_) => { + Opnd::UImm(_) | Opnd::Imm(_) => { + *truthy = asm.load(*truthy); + }, + // Opnd::Value could have already been split + Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => { *truthy = asm.load(*truthy); }, _ => {} @@ -253,26 +308,31 @@ impl Assembler *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); asm.push_insn(insn); }, - Insn::Mov { dest, src } => { + Insn::Mov { dest, src } | Insn::Store { dest, src } => { match (&dest, &src) { (Opnd::Mem(_), Opnd::Mem(_)) => { // We load opnd1 because for mov, opnd0 is the output let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); }, - (Opnd::Mem(_), Opnd::UImm(value)) => { - // 32-bit values will be sign-extended - if imm_num_bits(*value as i64) > 32 { + (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if *num_bits == 64 && imm_num_bits(*value as i64) > 32 { let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); } else { asm.mov(*dest, *src); } }, - (Opnd::Mem(_), Opnd::Imm(value)) => { - if imm_num_bits(*value) > 32 { + (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if *num_bits == 64 && imm_num_bits(*value) > 32 { let opnd1 = asm.load(*src); asm.mov(*dest, opnd1); + } else if uimm_num_bits(*value as u64) <= *num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + asm.mov(*dest, Opnd::UImm(*value as u64)); } else { asm.mov(*dest, *src); } @@ -310,13 +370,25 @@ impl Assembler // Load each operand into the corresponding argument // register. for (idx, opnd) in opnds.into_iter().enumerate() { - asm.load_into(C_ARG_OPNDS[idx], *opnd); + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); } // Now we push the CCall without any arguments so that it // just performs the call. asm.ccall(*fptr, vec![]); }, + Insn::Lea { .. } => { + // Merge `lea` and `mov` into a single `lea` when possible + match (&insn, iterator.peek()) { + (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src })) + if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => { + asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) }); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => asm.push_insn(insn), + } + }, _ => { if insn.out_opnd().is_some() { let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); @@ -335,7 +407,7 @@ impl Assembler } /// Emit platform-specific machine code - pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> + pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>> { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register @@ -365,12 +437,45 @@ impl Assembler } } + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Option<Target> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()); + Some(Target::SideExitPtr(side_exit?)) + } else { + Some(target) + } + } + + fn emit_csel( + cb: &mut CodeBlock, + truthy: Opnd, + falsy: Opnd, + out: Opnd, + cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd), + cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){ + + // Assert that output is a register + out.unwrap_reg(); + + // If the truthy value is a memory operand + if let Opnd::Mem(_) = truthy { + if out != falsy { + mov(cb, out.into(), falsy.into()); + } + + cmov_fn(cb, out.into(), truthy.into()); + } else { + if out != truthy { + mov(cb, out.into(), truthy.into()); + } - fn emit_csel(cb: &mut CodeBlock, truthy: Opnd, falsy: Opnd, out: Opnd, cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd)) { - if out != truthy { - mov(cb, out.into(), truthy.into()); + cmov_neg(cb, out.into(), falsy.into()); } - cmov_fn(cb, out.into(), falsy.into()); } //dbg!(&self.insns); @@ -378,10 +483,13 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec<u32> = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); - let mut insns_idx: usize = 0; - while let Some(insn) = self.insns.get(insns_idx) { + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { let src_ptr = cb.get_write_ptr(); let had_dropped_bytes = cb.has_dropped_bytes(); let old_label_state = cb.get_label_state(); @@ -389,9 +497,7 @@ impl Assembler match insn { Insn::Comment(text) => { - if cfg!(feature = "disasm") { - cb.add_comment(text); - } + cb.add_comment(text); }, // Write the label at the current position @@ -400,8 +506,8 @@ impl Assembler }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); }, Insn::BakeString(text) => { @@ -414,19 +520,37 @@ impl Assembler cb.write_byte(0); }, + // Set up RBP to work with frame pointer unwinding + // (e.g. with Linux `perf record --call-graph fp`) + Insn::FrameSetup => { + if get_option!(frame_pointer) { + push(cb, RBP); + mov(cb, RBP, RSP); + push(cb, RBP); + } + }, + Insn::FrameTeardown => { + if get_option!(frame_pointer) { + pop(cb, RBP); + pop(cb, RBP); + } + }, + Insn::Add { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); add(cb, left.into(), opnd1); }, - Insn::FrameSetup => {}, - Insn::FrameTeardown => {}, - Insn::Sub { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); sub(cb, left.into(), opnd1); }, + Insn::Mul { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + imul(cb, left.into(), opnd1); + }, + Insn::And { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); and(cb, left.into(), opnd1); @@ -490,16 +614,23 @@ impl Assembler lea(cb, out.into(), opnd.into()); }, - // Load relative address - Insn::LeaLabel { target, out } => { - let label_idx = target.unwrap_label_idx(); - - cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { - let disp = dst_addr - src_addr; - lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); - }); + // Load address of jump target + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); - mov(cb, out.into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + // Constant encoded length important for patching + movabs(cb, out.into(), target_addr); + } }, // Push and pop to/from the C stack @@ -580,61 +711,96 @@ impl Assembler // Conditional jump to a label Insn::Jmp(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label_idx) => jmp_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Je(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label_idx) => je_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jne(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label_idx) => jne_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jl(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label_idx) => jl_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jg(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr), + Target::Label(label_idx) => jg_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jge(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr), + Target::Label(label_idx) => jge_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, Insn::Jbe(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label_idx) => jbe_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jb(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr), + Target::Label(label_idx) => jb_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, Insn::Jz(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label_idx) => jz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jnz(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label_idx) => jnz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } - Insn::Jo(target) => { - match *target { + Insn::Jo(target) | + Insn::JoMul(target) => { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label_idx) => jo_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } + Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"), + // Atomically increment a counter at a given memory location Insn::IncrCounter { mem, value } => { assert!(matches!(mem, Opnd::Mem(_))); @@ -646,43 +812,36 @@ impl Assembler Insn::Breakpoint => int3(cb), Insn::CSelZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovnz); + emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); }, Insn::CSelNZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovz); + emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz); }, Insn::CSelE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovne); + emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne); }, Insn::CSelNE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmove); + emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove); }, Insn::CSelL { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovge); + emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge); }, Insn::CSelLE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovg); + emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg); }, Insn::CSelG { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovle); + emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle); }, Insn::CSelGE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovl); + emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); - if code_size < JMP_PTR_BYTES { - nop(cb, (JMP_PTR_BYTES - code_size) as u32); + if code_size < cb.jmp_ptr_bytes() { + nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32); } } - - // We want to keep the panic here because some instructions that - // we feed to the backend could get lowered into other - // instructions. So it's possible that some of our backend - // instructions can never make it to the emit stage. - #[allow(unreachable_patterns)] - _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) }; // On failure, jump to the next page and retry the current insn @@ -690,18 +849,32 @@ impl Assembler // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); } else { - insns_idx += 1; + insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } - gc_offsets + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> - { - let mut asm = self.x86_split().alloc_regs(regs); + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.x86_split(); + let mut asm = asm.alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { @@ -709,24 +882,32 @@ impl Assembler assert!(label_idx == idx); } - let gc_offsets = asm.x86_emit(cb); + let mut ocb = ocb; // for &mut + let start_ptr = cb.get_write_ptr(); + let gc_offsets = asm.x86_emit(cb, &mut ocb); - if cb.has_dropped_bytes() { - cb.clear_labels(); - } else { + if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { cb.link_labels(); - } - gc_offsets + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } } } #[cfg(test)] mod tests { + use crate::disasm::assert_disasm; + #[cfg(feature = "disasm")] + use crate::disasm::{unindent, disasm_addr_range}; + use super::*; fn setup_asm() -> (Assembler, CodeBlock) { - (Assembler::new(), CodeBlock::new_dummy(1024)) + (Assembler::new(0), CodeBlock::new_dummy(1024)) } #[test] @@ -892,4 +1073,268 @@ mod tests { assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8"); } + + #[test] + fn test_merge_lea_reg() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(SP, sp); // should be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d5b08", {" + 0x0: lea rbx, [rbx + 8] + "}); + } + + #[test] + fn test_merge_lea_mem() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d4308488903", {" + 0x0: lea rax, [rbx + 8] + 0x4: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_replace_cmp_0() { + let (mut asm, mut cb) = setup_asm(); + + let val = asm.load(Opnd::mem(64, SP, 8)); + asm.cmp(val, 0.into()); + let result = asm.csel_e(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(RAX_REG), result); + asm.compile_with_num_regs(&mut cb, 2); + + assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0"); + } + + #[test] + fn test_merge_add_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.add(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983c540"); + } + + #[test] + fn test_merge_sub_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.sub(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983ed40"); + } + + #[test] + fn test_merge_and_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.and(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983e540"); + } + + #[test] + fn test_merge_or_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.or(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983cd40"); + } + + #[test] + fn test_merge_xor_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.xor(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983f540"); + } + + #[test] + fn test_reorder_c_args_no_cycle() { + let (mut asm, mut cb) = setup_asm(); + + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) + C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "b800000000ffd0", {" + 0x0: mov eax, 0 + 0x5: call rax + "}); + } + + #[test] + fn test_reorder_c_args_single_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov eax, 0 + 0xe: call rax + "}); + } + + #[test] + fn test_reorder_c_args_two_cycles() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle, and rdx and rcx form another cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[3], // mov rdx, rcx + C_ARG_OPNDS[2], // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov r11, rcx + 0xc: mov rcx, rdx + 0xf: mov rdx, r11 + 0x12: mov eax, 0 + 0x17: call rax + "}); + } + + #[test] + fn test_reorder_c_args_large_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi, rsi, and rdx form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[2], // mov rsi, rdx + C_ARG_OPNDS[0], // mov rdx, rdi + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdx + 0x6: mov rdx, rdi + 0x9: mov rdi, r11 + 0xc: mov eax, 0 + 0x11: call rax + "}); + } + + #[test] + fn test_reorder_c_args_with_insn_out() { + let (mut asm, mut cb) = setup_asm(); + + let rax = asm.load(Opnd::UImm(1)); + let rcx = asm.load(Opnd::UImm(2)); + let rdx = asm.load(Opnd::UImm(3)); + // rcx and rdx form a cycle + asm.ccall(0 as _, vec![ + rax, // mov rdi, rax + rcx, // mov rsi, rcx + rcx, // mov rdx, rcx + rdx, // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov eax, 1 + 0x5: mov ecx, 2 + 0xa: mov edx, 3 + 0xf: mov rdi, rax + 0x12: mov rsi, rcx + 0x15: mov r11, rcx + 0x18: mov rcx, rdx + 0x1b: mov rdx, r11 + 0x1e: mov eax, 0 + 0x23: call rax + "}); + } + + #[test] + fn test_cmov_mem() { + let (mut asm, mut cb) = setup_asm(); + + let top = Opnd::mem(64, SP, 0); + let ary_opnd = SP; + let array_len_opnd = Opnd::mem(64, SP, 16); + + asm.cmp(array_len_opnd, 1.into()); + let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into()); + asm.mov(top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {" + 0x0: cmp qword ptr [rbx + 0x10], 1 + 0x5: mov eax, 4 + 0xa: cmovg rax, qword ptr [rbx] + 0xe: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_csel_split() { + let (mut asm, mut cb) = setup_asm(); + + let stack_top = Opnd::mem(64, SP, 0); + let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into()); + asm.mov(stack_top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {" + 0x0: movabs rax, 0x7f22c88d1930 + 0xa: mov ecx, 4 + 0xf: cmove rax, rcx + 0x13: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_mov_m32_imm32() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, C_RET_OPND, 0); + asm.mov(shape_opnd, Opnd::UImm(0x8000_0001)); + asm.mov(shape_opnd, Opnd::Imm(0x8000_0001)); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "c70001000080c70001000080", {" + 0x0: mov dword ptr [rax], 0x80000001 + 0x6: mov dword ptr [rax], 0x80000001 + "}); + } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 5b8b1a1ff9..0fbca85716 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3,6 +3,7 @@ use crate::asm::*; use crate::backend::ir::*; +use crate::backend::current::TEMP_REGS; use crate::core::*; use crate::cruby::*; use crate::invariants::*; @@ -12,12 +13,17 @@ use crate::utils::*; use CodegenStatus::*; use YARVOpnd::*; +use std::cell::Cell; use std::cmp; +use std::cmp::min; use std::collections::HashMap; +use std::ffi::c_void; use std::ffi::CStr; -use std::mem::{self, size_of}; -use std::os::raw::{c_int, c_uint}; +use std::mem; +use std::os::raw::c_int; use std::ptr; +use std::rc::Rc; +use std::cell::RefCell; use std::slice; pub use crate::virtualmem::CodePtr; @@ -26,199 +32,530 @@ pub use crate::virtualmem::CodePtr; #[derive(PartialEq, Debug)] enum CodegenStatus { KeepCompiling, - CantCompile, EndBlock, } /// Code generation function signature type InsnGenFn = fn( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus; +) -> Option<CodegenStatus>; + +/// Ephemeral code generation state. +/// Represents a [crate::core::Block] while we build it. +pub struct JITState<'a> { + /// Instruction sequence for the compiling block + pub iseq: IseqPtr, + + /// The iseq index of the first instruction in the block + starting_insn_idx: IseqIdx, -/// Code generation state -/// This struct only lives while code is being generated -pub struct JITState { - // Block version being compiled - block: BlockRef, + /// The [Context] entering into the first instruction of the block + starting_ctx: Context, - // Instruction sequence this is associated with - iseq: IseqPtr, + /// The placement for the machine code of the [Block] + output_ptr: CodePtr, - // Index of the current instruction being compiled - insn_idx: u32, + /// Index of the current instruction being compiled + insn_idx: IseqIdx, - // Opcode for the instruction being compiled + /// Opcode for the instruction being compiled opcode: usize, - // PC of the instruction being compiled + /// PC of the instruction being compiled pc: *mut VALUE, - // Side exit to the instruction being compiled. See :side-exit:. - side_exit_for_pc: Option<CodePtr>, + /// stack_size when it started to compile the current instruction. + stack_size_for_pc: u8, + + /// Execution context when compilation started + /// This allows us to peek at run-time values + ec: EcPtr, + + /// The code block used for stubs, exits, and other code that are + /// not on the hot path. + outlined_code_block: &'a mut OutlinedCb, + + /// The outgoing branches the block will have + pub pending_outgoing: Vec<PendingBranchRef>, + + // --- Fields for block invalidation and invariants tracking below: + // Public mostly so into_block defined in the sibling module core + // can partially move out of Self. + + /// Whether we need to record the code address at + /// the end of this bytecode instruction for global invalidation + pub record_boundary_patch_point: bool, + + /// Code for immediately exiting upon entry to the block. + /// Required for invalidation. + pub block_entry_exit: Option<CodePtr>, + + /// A list of callable method entries that must be valid for the block to be valid. + pub method_lookup_assumptions: Vec<CmePtr>, - // Execution context when compilation started - // This allows us to peek at run-time values - ec: Option<EcPtr>, + /// A list of basic operators that not be redefined for the block to be valid. + pub bop_assumptions: Vec<(RedefinitionFlag, ruby_basic_operators)>, - // Whether we need to record the code address at - // the end of this bytecode instruction for global invalidation - record_boundary_patch_point: bool, + /// A list of constant expression path segments that must have + /// not been written to for the block to be valid. + pub stable_constant_names_assumption: Option<*const ID>, + + /// A list of classes that are not supposed to have a singleton class. + pub no_singleton_class_assumptions: Vec<VALUE>, + + /// When true, the block is valid only when base pointer is equal to environment pointer. + pub no_ep_escape: bool, + + /// When true, the block is valid only when there is a total of one ractor running + pub block_assumes_single_ractor: bool, + + /// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt) + perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>, + + /// Stack of symbol names for --yjit-perf + perf_stack: Vec<String>, + + /// When true, this block is the first block compiled by gen_block_series(). + first_block: bool, + + /// A killswitch for bailing out of compilation. Used in rare situations where we need to fail + /// compilation deep in the stack (e.g. codegen failed for some jump target, but not due to + /// OOM). Because these situations are so rare it's not worth it to check and propogate at each + /// site. Instead, we check this once at the end. + block_abandoned: bool, } -impl JITState { - pub fn new(blockref: &BlockRef) -> Self { +impl<'a> JITState<'a> { + pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self { JITState { - block: blockref.clone(), - iseq: ptr::null(), // TODO: initialize this from the blockid + iseq: blockid.iseq, + starting_insn_idx: blockid.idx, + starting_ctx, + output_ptr, insn_idx: 0, opcode: 0, pc: ptr::null_mut::<VALUE>(), - side_exit_for_pc: None, - ec: None, + stack_size_for_pc: starting_ctx.get_stack_size(), + pending_outgoing: vec![], + ec, + outlined_code_block: ocb, record_boundary_patch_point: false, + block_entry_exit: None, + method_lookup_assumptions: vec![], + bop_assumptions: vec![], + stable_constant_names_assumption: None, + no_singleton_class_assumptions: vec![], + no_ep_escape: false, + block_assumes_single_ractor: false, + perf_map: Rc::default(), + perf_stack: vec![], + first_block, + block_abandoned: false, } } - pub fn get_block(&self) -> BlockRef { - self.block.clone() - } - - pub fn get_insn_idx(&self) -> u32 { + pub fn get_insn_idx(&self) -> IseqIdx { self.insn_idx } - pub fn get_iseq(self: &JITState) -> IseqPtr { + pub fn get_iseq(&self) -> IseqPtr { self.iseq } - pub fn get_opcode(self: &JITState) -> usize { + pub fn get_opcode(&self) -> usize { self.opcode } - pub fn get_pc(self: &JITState) -> *mut VALUE { + pub fn get_pc(&self) -> *mut VALUE { self.pc } -} -use crate::codegen::JCCKinds::*; + pub fn get_starting_insn_idx(&self) -> IseqIdx { + self.starting_insn_idx + } -#[allow(non_camel_case_types, unused)] -pub enum JCCKinds { - JCC_JNE, - JCC_JNZ, - JCC_JZ, - JCC_JE, - JCC_JBE, - JCC_JNA, -} + pub fn get_block_entry_exit(&self) -> Option<CodePtr> { + self.block_entry_exit + } -pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE { - // insn_len require non-test config - #[cfg(not(test))] - assert!(insn_len(jit.get_opcode()) > (arg_idx + 1).try_into().unwrap()); - unsafe { *(jit.pc.offset(arg_idx + 1)) } -} + pub fn get_starting_ctx(&self) -> Context { + self.starting_ctx + } -// Get the index of the next instruction -fn jit_next_insn_idx(jit: &JITState) -> u32 { - jit.insn_idx + insn_len(jit.get_opcode()) -} + pub fn get_arg(&self, arg_idx: isize) -> VALUE { + // insn_len require non-test config + #[cfg(not(test))] + assert!(insn_len(self.get_opcode()) > (arg_idx + 1).try_into().unwrap()); + unsafe { *(self.pc.offset(arg_idx + 1)) } + } -// Check if we are compiling the instruction at the stub PC -// Meaning we are compiling the instruction that is next to execute -fn jit_at_current_insn(jit: &JITState) -> bool { - let ec_pc: *mut VALUE = unsafe { get_cfp_pc(get_ec_cfp(jit.ec.unwrap())) }; - ec_pc == jit.pc -} + /// Get [Self::outlined_code_block] + pub fn get_ocb(&mut self) -> &mut OutlinedCb { + self.outlined_code_block + } + + /// Leave a code stub to re-enter the compiler at runtime when the compiling program point is + /// reached. Should always be used in tail position like `return jit.defer_compilation(asm);`. + #[must_use] + fn defer_compilation(&mut self, asm: &mut Assembler) -> Option<CodegenStatus> { + if crate::core::defer_compilation(self, asm).is_err() { + // If we can't leave a stub, the block isn't usable and we have to bail. + self.block_abandoned = true; + } + Some(EndBlock) + } + + /// Generate a branch with either end possibly stubbed out + fn gen_branch( + &mut self, + asm: &mut Assembler, + target0: BlockId, + ctx0: &Context, + target1: Option<BlockId>, + ctx1: Option<&Context>, + gen_fn: BranchGenFn, + ) { + if crate::core::gen_branch(self, asm, target0, ctx0, target1, ctx1, gen_fn).is_none() { + // If we can't meet the request for a branch, the code is + // essentially corrupt and we have to discard the block. + self.block_abandoned = true; + } + } -// Peek at the nth topmost value on the Ruby stack. -// Returns the topmost value when n == 0. -fn jit_peek_at_stack(jit: &JITState, ctx: &Context, n: isize) -> VALUE { - assert!(jit_at_current_insn(jit)); - assert!(n < ctx.get_stack_size() as isize); + /// Wrapper for [self::gen_outlined_exit] with error handling. + fn gen_outlined_exit(&mut self, exit_pc: *mut VALUE, ctx: &Context) -> Option<CodePtr> { + let result = gen_outlined_exit(exit_pc, self.num_locals(), ctx, self.get_ocb()); + if result.is_none() { + // When we can't have the exits, the code is incomplete and we have to bail. + self.block_abandoned = true; + } - // Note: this does not account for ctx->sp_offset because - // this is only available when hitting a stub, and while - // hitting a stub, cfp->sp needs to be up to date in case - // codegen functions trigger GC. See :stub-sp-flush:. - return unsafe { - let sp: *mut VALUE = get_cfp_sp(get_ec_cfp(jit.ec.unwrap())); + result + } + + /// Return true if the current ISEQ could escape an environment. + /// + /// As of vm_push_frame(), EP is always equal to BP. However, after pushing + /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP. + /// Also, some method calls escape the environment to the heap. + fn escapes_ep(&self) -> bool { + match unsafe { get_iseq_body_type(self.iseq) } { + // <main> frame is always associated to TOPLEVEL_BINDING. + ISEQ_TYPE_MAIN | + // Kernel#eval uses a heap EP when a Binding argument is not nil. + ISEQ_TYPE_EVAL => true, + // If this ISEQ has previously escaped EP, give up the optimization. + _ if iseq_escapes_ep(self.iseq) => true, + _ => false, + } + } - *(sp.offset(-1 - n)) - }; -} + // Get the index of the next instruction + fn next_insn_idx(&self) -> u16 { + self.insn_idx + insn_len(self.get_opcode()) as u16 + } -fn jit_peek_at_self(jit: &JITState) -> VALUE { - unsafe { get_cfp_self(get_ec_cfp(jit.ec.unwrap())) } -} + /// Get the index of the next instruction of the next instruction + fn next_next_insn_idx(&self) -> u16 { + let next_pc = unsafe { rb_iseq_pc_at_idx(self.iseq, self.next_insn_idx().into()) }; + let next_opcode: usize = unsafe { rb_iseq_opcode_at_pc(self.iseq, next_pc) }.try_into().unwrap(); + self.next_insn_idx() + insn_len(next_opcode) as u16 + } + + // Check if we are compiling the instruction at the stub PC with the target Context + // Meaning we are compiling the instruction that is next to execute + pub fn at_compile_target(&self) -> bool { + // If this is not the first block compiled by gen_block_series(), + // it might be compiling the same block again with a different Context. + // In that case, it should defer_compilation() and inspect the stack there. + if !self.first_block { + return false; + } -fn jit_peek_at_local(jit: &JITState, n: i32) -> VALUE { - assert!(jit_at_current_insn(jit)); + let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) }; + ec_pc == self.pc + } - let local_table_size: isize = unsafe { get_iseq_body_local_table_size(jit.iseq) } - .try_into() - .unwrap(); - assert!(n < local_table_size.try_into().unwrap()); + // Peek at the nth topmost value on the Ruby stack. + // Returns the topmost value when n == 0. + pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE { + assert!(self.at_compile_target()); + assert!(n < ctx.get_stack_size() as isize); - unsafe { - let ep = get_cfp_ep(get_ec_cfp(jit.ec.unwrap())); - let n_isize: isize = n.try_into().unwrap(); - let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1; - *ep.offset(offs) + // Note: this does not account for ctx->sp_offset because + // this is only available when hitting a stub, and while + // hitting a stub, cfp->sp needs to be up to date in case + // codegen functions trigger GC. See :stub-sp-flush:. + return unsafe { + let sp: *mut VALUE = get_cfp_sp(self.get_cfp()); + + *(sp.offset(-1 - n)) + }; } -} -fn jit_peek_at_block_handler(jit: &JITState, level: u32) -> VALUE { - assert!(jit_at_current_insn(jit)); + fn peek_at_self(&self) -> VALUE { + unsafe { get_cfp_self(self.get_cfp()) } + } - unsafe { - let ep = get_cfp_ep_level(get_ec_cfp(jit.ec.unwrap()), level); - *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) + fn peek_at_local(&self, n: i32) -> VALUE { + assert!(self.at_compile_target()); + + let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) } + .try_into() + .unwrap(); + assert!(n < local_table_size.try_into().unwrap()); + + unsafe { + let ep = get_cfp_ep(self.get_cfp()); + let n_isize: isize = n.try_into().unwrap(); + let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1; + *ep.offset(offs) + } + } + + fn peek_at_block_handler(&self, level: u32) -> VALUE { + assert!(self.at_compile_target()); + + unsafe { + let ep = get_cfp_ep_level(self.get_cfp(), level); + *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) + } + } + + pub fn assume_expected_cfunc( + &mut self, + asm: &mut Assembler, + class: VALUE, + method: ID, + cfunc: *mut c_void, + ) -> bool { + let cme = unsafe { rb_callable_method_entry(class, method) }; + + if cme.is_null() { + return false; + } + + let def_type = unsafe { get_cme_def_type(cme) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return false; + } + if unsafe { get_mct_func(get_cme_def_body_cfunc(cme)) } != cfunc { + return false; + } + + self.assume_method_lookup_stable(asm, cme); + + true + } + + pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, cme: CmePtr) -> Option<()> { + jit_ensure_block_entry_exit(self, asm)?; + self.method_lookup_assumptions.push(cme); + + Some(()) + } + + /// Assume that objects of a given class will have no singleton class. + /// Return true if there has been no such singleton class since boot + /// and we can safely invalidate it. + pub fn assume_no_singleton_class(&mut self, asm: &mut Assembler, klass: VALUE) -> bool { + if jit_ensure_block_entry_exit(self, asm).is_none() { + return false; // out of space, give up + } + if has_singleton_class_of(klass) { + return false; // we've seen a singleton class. disable the optimization to avoid an invalidation loop. + } + self.no_singleton_class_assumptions.push(klass); + true + } + + /// Assume that base pointer is equal to environment pointer in the current ISEQ. + /// Return true if it's safe to assume so. + fn assume_no_ep_escape(&mut self, asm: &mut Assembler) -> bool { + if jit_ensure_block_entry_exit(self, asm).is_none() { + return false; // out of space, give up + } + if self.escapes_ep() { + return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop. + } + self.no_ep_escape = true; + true + } + + fn get_cfp(&self) -> *mut rb_control_frame_struct { + unsafe { get_ec_cfp(self.ec) } + } + + pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, id: *const ID) -> Option<()> { + jit_ensure_block_entry_exit(self, asm)?; + self.stable_constant_names_assumption = Some(id); + + Some(()) + } + + pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) { + self.pending_outgoing.push(branch) + } + + /// Push a symbol for --yjit-perf + fn perf_symbol_push(&mut self, asm: &mut Assembler, symbol_name: &str) { + if !self.perf_stack.is_empty() { + self.perf_symbol_range_end(asm); + } + self.perf_stack.push(symbol_name.to_string()); + self.perf_symbol_range_start(asm, symbol_name); + } + + /// Pop the stack-top symbol for --yjit-perf + fn perf_symbol_pop(&mut self, asm: &mut Assembler) { + self.perf_symbol_range_end(asm); + self.perf_stack.pop(); + if let Some(symbol_name) = self.perf_stack.get(0) { + self.perf_symbol_range_start(asm, symbol_name); + } + } + + /// Mark the start address of a symbol to be reported to perf + fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) { + let symbol_name = format!("[JIT] {}", symbol_name); + let syms = self.perf_map.clone(); + asm.pos_marker(move |start, _| syms.borrow_mut().push((start, None, symbol_name.clone()))); + } + + /// Mark the end address of a symbol to be reported to perf + fn perf_symbol_range_end(&self, asm: &mut Assembler) { + let syms = self.perf_map.clone(); + asm.pos_marker(move |end, _| { + if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() { + assert_eq!(None, *end_store); + *end_store = Some(end); + } + }); + } + + /// Flush addresses and symbols to /tmp/perf-{pid}.map + fn flush_perf_symbols(&self, cb: &CodeBlock) { + assert_eq!(0, self.perf_stack.len()); + let path = format!("/tmp/perf-{}.map", std::process::id()); + let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap()); + for sym in self.perf_map.borrow().iter() { + if let (start, Some(end), name) = sym { + // In case the code straddles two pages, part of it belongs to the symbol. + for (inline_start, inline_end) in cb.writable_addrs(*start, *end) { + use std::io::Write; + let code_size = inline_end - inline_start; + writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap(); + } + } + } + } + + /// Return true if we're compiling a send-like instruction, not an opt_* instruction. + pub fn is_sendish(&self) -> bool { + match unsafe { rb_iseq_opcode_at_pc(self.iseq, self.pc) } as u32 { + YARVINSN_send | + YARVINSN_opt_send_without_block | + YARVINSN_invokesuper => true, + _ => false, + } + } + + /// Return the number of locals in the current ISEQ + pub fn num_locals(&self) -> u32 { + unsafe { get_iseq_body_local_table_size(self.iseq) } } } -macro_rules! gen_counter_incr { - ($asm:tt, $counter_name:ident) => { - if (get_option!(gen_stats)) { - // Get a pointer to the counter variable - let ptr = ptr_to_counter!($counter_name); +/// Macro to call jit.perf_symbol_push() without evaluating arguments when +/// the option is turned off, which is useful for avoiding string allocation. +macro_rules! jit_perf_symbol_push { + ($jit:expr, $asm:expr, $symbol_name:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_push($asm, $symbol_name); + } + }; +} - // Load the pointer into a register - $asm.comment(&format!("increment counter {}", stringify!($counter_name))); - let ptr_reg = $asm.load(Opnd::const_ptr(ptr as *const u8)); - let counter_opnd = Opnd::mem(64, ptr_reg, 0); +/// Macro to call jit.perf_symbol_pop(), for consistency with jit_perf_symbol_push!(). +macro_rules! jit_perf_symbol_pop { + ($jit:expr, $asm:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_pop($asm); + } + }; +} - // Increment and store the updated value - $asm.incr_counter(counter_opnd, Opnd::UImm(1)); +/// Macro to push and pop a perf symbol around a function call. +macro_rules! perf_call { + // perf_call!("prefix: ", func(...)) uses "prefix: func" as a symbol. + ($prefix:expr, $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) ) => { + { + jit_perf_symbol_push!($jit, $asm, &format!("{}{}", $prefix, stringify!($func_name)), PerfMap::Codegen); + let ret = $func_name($jit, $asm, $($arg),*); + jit_perf_symbol_pop!($jit, $asm, PerfMap::Codegen); + ret } }; + // perf_call! { func(...) } uses "func" as a symbol. + { $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) } => { + perf_call!("", $func_name($jit, $asm, $($arg),*)) + }; } -macro_rules! counted_exit { - ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => { - // The counter is only incremented when stats are enabled - if (!get_option!(gen_stats)) { - $existing_side_exit - } else { - let ocb = $ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); +use crate::codegen::JCCKinds::*; +use crate::log::Log; - let mut ocb_asm = Assembler::new(); +#[allow(non_camel_case_types, unused)] +pub enum JCCKinds { + JCC_JNE, + JCC_JNZ, + JCC_JZ, + JCC_JE, + JCC_JB, + JCC_JBE, + JCC_JNA, + JCC_JNAE, + JCC_JO_MUL, +} - // Increment the counter - gen_counter_incr!(ocb_asm, $counter_name); +/// Generate code to increment a given counter. With --yjit-trace-exits=counter, +/// the counter is traced when it's incremented by this function. +#[inline(always)] +fn gen_counter_incr(jit: &JITState, asm: &mut Assembler, counter: Counter) { + gen_counter_incr_with_pc(asm, counter, jit.pc); +} - // Jump to the existing side exit - ocb_asm.jmp($existing_side_exit); - ocb_asm.compile(ocb); +/// Same as gen_counter_incr(), but takes PC isntead of JITState. +#[inline(always)] +fn gen_counter_incr_with_pc(asm: &mut Assembler, counter: Counter, pc: *mut VALUE) { + gen_counter_incr_without_pc(asm, counter); - // Pointer to the side-exit code - code_ptr.as_side_exit() - } - }; + // Trace a counter if --yjit-trace-exits=counter is given. + // TraceExits::All is handled by gen_exit(). + if get_option!(trace_exits) == Some(TraceExits::Counter(counter)) { + with_caller_saved_temp_regs(asm, |asm| { + asm.ccall(rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(pc as *const u8)]); + }); + } +} + +/// Generate code to increment a given counter. Not traced by --yjit-trace-exits=counter +/// unlike gen_counter_incr() or gen_counter_incr_with_pc(). +#[inline(always)] +fn gen_counter_incr_without_pc(asm: &mut Assembler, counter: Counter) { + // Assert that default counters are not incremented by generated code as this would impact performance + assert!(!DEFAULT_COUNTERS.contains(&counter), "gen_counter_incr incremented {:?}", counter); + + if get_option!(gen_stats) { + asm_comment!(asm, "increment counter {}", counter.get_name()); + let ptr = get_counter_ptr(&counter.get_name()); + let ptr_reg = asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, Opnd::UImm(1)); + } } // Save the incremented PC on the CFP @@ -230,7 +567,7 @@ fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { pc.offset(cur_insn_len) }; - asm.comment("save PC to CFP"); + asm_comment!(asm, "save PC to CFP"); asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8)); } @@ -238,43 +575,116 @@ fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { /// This realigns the interpreter SP with the JIT SP /// Note: this will change the current value of REG_SP, /// which could invalidate memory operands -fn gen_save_sp(_jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { - if ctx.get_sp_offset() != 0 { - asm.comment("save SP to CFP"); - let stack_pointer = ctx.sp_opnd(0); +fn gen_save_sp(asm: &mut Assembler) { + gen_save_sp_with_offset(asm, 0); +} + +/// Save the current SP + offset on the CFP +fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) { + if asm.ctx.get_sp_offset() != -offset { + asm_comment!(asm, "save SP to CFP"); + let stack_pointer = asm.ctx.sp_opnd(offset as i32); let sp_addr = asm.lea(stack_pointer); asm.mov(SP, sp_addr); let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); asm.mov(cfp_sp_opnd, SP); - ctx.set_sp_offset(0); + asm.ctx.set_sp_offset(-offset); + } +} + +/// Basically jit_prepare_non_leaf_call(), but this registers the current PC +/// to lazily push a C method frame when it's necessary. +fn jit_prepare_lazy_frame_call( + jit: &mut JITState, + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, + recv_opnd: YARVOpnd, +) -> bool { + // We can use this only when the receiver is on stack. + let recv_idx = match recv_opnd { + StackOpnd(recv_idx) => recv_idx, + _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd), + }; + + // Get the next PC. jit_save_pc() saves that PC. + let pc: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + jit.get_pc().offset(cur_insn_len) + }; + + let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc(); + match pc_to_cfunc.get(&pc) { + Some(&(other_cme, _)) if other_cme != cme => { + // Bail out if it's not the only cme on this callsite. + incr_counter!(lazy_frame_failure); + return false; + } + _ => { + // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC. + incr_counter!(lazy_frame_count); + pc_to_cfunc.insert(pc, (cme, recv_idx)); + } } + + // Save the PC to trigger a lazy frame push, and save the SP to get the receiver. + // The C func may call a method that doesn't raise, so prepare for invalidation too. + jit_prepare_non_leaf_call(jit, asm); + + // Make sure we're ready for calling rb_vm_push_cfunc_frame(). + let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) }; + if cfunc_argc != -1 { + assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible + } + assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen. + + true } -/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that -/// could: +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: /// - Perform GC allocation /// - Take the VM lock through RB_VM_LOCK_ENTER() /// - Perform Ruby method call -fn jit_prepare_routine_call( +/// +/// If the routine doesn't call arbitrary methods, use jit_prepare_call_with_gc() instead. +fn jit_prepare_non_leaf_call( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler ) { - jit.record_boundary_patch_point = true; - jit_save_pc(jit, asm); - gen_save_sp(jit, asm, ctx); + // Prepare for GC. Setting PC also prepares for showing a backtrace. + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC // In case the routine calls Ruby methods, it can set local variables - // through Kernel#binding and other means. - ctx.clear_local_types(); + // through Kernel#binding, rb_debug_inspector API, and other means. + asm.clear_local_types(); +} + +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: +/// - Perform GC allocation +/// - Take the VM lock through RB_VM_LOCK_ENTER() +fn jit_prepare_call_with_gc( + jit: &mut JITState, + asm: &mut Assembler +) { + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC + + // Expect a leaf ccall(). You should use jit_prepare_non_leaf_call() if otherwise. + asm.expect_leaf_ccall(); } /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + // We add a padding before pos_marker so that the previous patch will not overlap this. + // jump_to_next_insn() puts a patch point at the end of the block in fallthrough cases. + // In the fallthrough case, the next block should start with the same Context, so the + // patch is fine, but it should not overlap another patch. asm.pad_inval_patch(); - asm.pos_marker(move |code_ptr| { - CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos); + asm.pos_marker(move |code_ptr, cb| { + CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos, cb); }); } @@ -285,14 +695,36 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() } } + // Some types such as CString only assert the class field of the object + // when there has never been a singleton class created for objects of that class. + // Once there is a singleton class created they become their weaker + // `T*` variant, and we more objects should pass the verification. + fn relax_type_with_singleton_class_assumption(ty: Type) -> Type { + if let Type::CString | Type::CArray | Type::CHash = ty { + if has_singleton_class_of(ty.known_class().unwrap()) { + match ty { + Type::CString => return Type::TString, + Type::CArray => return Type::TArray, + Type::CHash => return Type::THash, + _ => (), + } + } + } + + ty + } + // Only able to check types when at current insn - assert!(jit_at_current_insn(jit)); + assert!(jit.at_compile_target()); - let self_val = jit_peek_at_self(jit); + let self_val = jit.peek_at_self(); let self_val_type = Type::from(self_val); + let learned_self_type = ctx.get_opnd_type(SelfOpnd); + let learned_self_type = relax_type_with_singleton_class_assumption(learned_self_type); + // Verify self operand type - if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == usize::MAX { + if self_val_type.diff(learned_self_type) == TypeDiff::Incompatible { panic!( "verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}", ctx.get_opnd_type(SelfOpnd), @@ -301,10 +733,13 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } // Verify stack operand types - let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u16); + let top_idx = cmp::min(ctx.get_stack_size(), MAX_CTX_TEMPS as u8); for i in 0..top_idx { - let (learned_mapping, learned_type) = ctx.get_opnd_mapping(StackOpnd(i)); - let stack_val = jit_peek_at_stack(jit, ctx, i as isize); + let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i)); + let learned_type = ctx.get_opnd_type(StackOpnd(i)); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + + let stack_val = jit.peek_at_stack(ctx, i as isize); let val_type = Type::from(stack_val); match learned_mapping { @@ -318,7 +753,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } } TempMapping::MapToLocal(local_idx) => { - let local_val = jit_peek_at_local(jit, local_idx.into()); + let local_val = jit.peek_at_local(local_idx.into()); if local_val != stack_val { panic!( "verify_ctx: stack value was mapped to local, but values did not match\n stack: {}\n local {}: {}", @@ -328,28 +763,30 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { ); } } - TempMapping::MapToStack => {} + TempMapping::MapToStack(_) => {} } // If the actual type differs from the learned type - if val_type.diff(learned_type) == usize::MAX { + if val_type.diff(learned_type) == TypeDiff::Incompatible { panic!( - "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {}", + "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {} ({:?})", learned_type, - obj_info_str(stack_val) + obj_info_str(stack_val), + val_type, ); } } // Verify local variable types let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; - let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES); + let top_idx: usize = cmp::min(local_table_size as usize, MAX_CTX_TEMPS); for i in 0..top_idx { let learned_type = ctx.get_local_type(i); - let local_val = jit_peek_at_local(jit, i as i32); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + let local_val = jit.peek_at_local(i as i32); let local_type = Type::from(local_val); - if local_type.diff(learned_type) == usize::MAX { + if local_type.diff(learned_type) == TypeDiff::Incompatible { panic!( "verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})", learned_type, @@ -364,14 +801,13 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { // to the interpreter when it cannot service a stub by generating new code. // Before coming here, branch_stub_hit() takes care of fully reconstructing // interpreter state. -fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { +fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); - gen_counter_incr!(asm, exit_from_branch_stub); + gen_counter_incr_without_pc(&mut asm, Counter::exit_from_branch_stub); - asm.comment("exit from branch stub"); + asm_comment!(asm, "exit from branch stub"); asm.cpop_into(SP); asm.cpop_into(EC); asm.cpop_into(CFP); @@ -380,23 +816,30 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { asm.cret(Qundef.into()); - asm.compile(ocb); - - code_ptr + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Generate an exit to return to the interpreter -fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { - #[cfg(all(feature = "disasm", not(test)))] - { +fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { + #[cfg(not(test))] + asm_comment!(asm, "exit to interpreter on {}", { let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; - asm.comment(&format!("exit to interpreter on {}", insn_name(opcode as usize))); + insn_name(opcode as usize) + }); + + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); } + // Spill stack temps before returning to the interpreter + asm.spill_regs(); + // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP - if ctx.get_sp_offset() != 0 { - let sp_opnd = asm.lea(ctx.sp_opnd(0)); + if asm.ctx.get_sp_offset() != 0 { + let sp_opnd = asm.lea(asm.ctx.sp_opnd(0)); asm.mov( Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), sp_opnd @@ -416,9 +859,9 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { vec![Opnd::const_ptr(exit_pc as *const u8)] ); - // If --yjit-trace-exits option is enabled, record the exit stack - // while recording the side exits. - if get_option!(gen_trace_exits) { + // If --yjit-trace-exits is enabled, record the exit stack while recording + // the side exits. TraceExits::Counter is handled by gen_counted_exit(). + if get_option!(trace_exits) == Some(TraceExits::All) { asm.ccall( rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(exit_pc as *const u8)] @@ -435,81 +878,108 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { asm.cret(Qundef.into()); } -/// Generate an exit to the interpreter in the outlined code block -fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> CodePtr { +/// :side-exit: +/// Get an exit for the current instruction in the outlined block. The code +/// for each instruction often begins with several guards before proceeding +/// to do work. When guards fail, an option we have is to exit to the +/// interpreter at an instruction boundary. The piece of code that takes +/// care of reconstructing interpreter state and exiting out of generated +/// code is called the side exit. +/// +/// No guards change the logic for reconstructing interpreter state at the +/// moment, so there is one unique side exit for each context. Note that +/// it's incorrect to jump to the side exit after any ctx stack push operations +/// since they change the logic required for reconstructing interpreter state. +/// +/// If you're in [the codegen module][self], use [JITState::gen_outlined_exit] +/// instead of calling this directly. +#[must_use] +pub fn gen_outlined_exit(exit_pc: *mut VALUE, num_locals: u32, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> { let mut cb = ocb.unwrap(); - let exit_code = cb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new(num_locals); + asm.ctx = *ctx; + asm.set_reg_mapping(ctx.get_reg_mapping()); + + gen_exit(exit_pc, &mut asm); + + asm.compile(&mut cb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Get a side exit. Increment a counter in it if --yjit-stats is enabled. +pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> { + // The counter is only incremented when stats are enabled + if !get_option!(gen_stats) { + return Some(side_exit); + } + let counter = match counter { + Some(counter) => counter, + None => return Some(side_exit), + }; - gen_exit(exit_pc, ctx, &mut asm); + let mut asm = Assembler::new_without_iseq(); - asm.compile(&mut cb); + // Increment a counter + gen_counter_incr_with_pc(&mut asm, counter, exit_pc); - exit_code + // Jump to the existing side exit + asm.jmp(Target::CodePtr(side_exit)); + + let ocb = ocb.unwrap(); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -// :side-exit: -// Get an exit for the current instruction in the outlined block. The code -// for each instruction often begins with several guards before proceeding -// to do work. When guards fail, an option we have is to exit to the -// interpreter at an instruction boundary. The piece of code that takes -// care of reconstructing interpreter state and exiting out of generated -// code is called the side exit. -// -// No guards change the logic for reconstructing interpreter state at the -// moment, so there is one unique side exit for each context. Note that -// it's incorrect to jump to the side exit after any ctx stack push operations -// since they change the logic required for reconstructing interpreter state. -fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> Target { - match jit.side_exit_for_pc { - None => { - let exit_code = gen_outlined_exit(jit.pc, ctx, ocb); - jit.side_exit_for_pc = Some(exit_code); - exit_code.as_side_exit() - } - Some(code_ptr) => code_ptr.as_side_exit() +/// Preserve caller-saved stack temp registers during the call of a given block +fn with_caller_saved_temp_regs<F, R>(asm: &mut Assembler, block: F) -> R where F: FnOnce(&mut Assembler) -> R { + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); // save stack temps + } + let ret = block(asm); + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); // restore stack temps } + ret } // Ensure that there is an exit for the start of the block being compiled. // Block invalidation uses this exit. -pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { - let blockref = jit.block.clone(); - let mut block = blockref.borrow_mut(); - let block_ctx = block.get_ctx(); - let blockid = block.get_blockid(); - - if block.entry_exit.is_some() { - return; +#[must_use] +pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> Option<()> { + if jit.block_entry_exit.is_some() { + return Some(()); } + let block_starting_context = &jit.get_starting_ctx(); + // If we're compiling the first instruction in the block. - if jit.insn_idx == blockid.idx { - // Generate the exit with the cache in jitstate. - block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx).unwrap_code_ptr()); + if jit.insn_idx == jit.starting_insn_idx { + // Generate the exit with the cache in Assembler. + let side_exit_context = SideExitContext::new(jit.pc, *block_starting_context); + let entry_exit = asm.get_side_exit(&side_exit_context, None, jit.get_ocb()); + jit.block_entry_exit = Some(entry_exit?); } else { - let _pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) }; - block.entry_exit = Some(gen_outlined_exit(jit.pc, &block_ctx, ocb)); + let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) }; + jit.block_entry_exit = Some(jit.gen_outlined_exit(block_entry_pc, block_starting_context)?); } + + Some(()) } // Landing code for when c_return tracing is enabled. See full_cfunc_return(). -fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { +fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // This chunk of code expects REG_EC to be filled properly and // RAX to contain the return value of the C method. - asm.comment("full cfunc return"); + asm_comment!(asm, "full cfunc return"); asm.ccall( rb_full_cfunc_return as *const u8, vec![EC, C_RET_OPND] ); // Count the exit - gen_counter_incr!(asm, traced_cfunc_return); + gen_counter_incr_without_pc(&mut asm, Counter::traced_cfunc_return); // Return to the interpreter asm.cpop_into(SP); @@ -520,26 +990,23 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { asm.cret(Qundef.into()); - asm.compile(ocb); - - return code_ptr; + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. /// This is used by gen_leave() and gen_entry_prologue() -fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { +fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // gen_leave() fully reconstructs interpreter state and leaves the // return value in C_RET_OPND before coming here. let ret_opnd = asm.live_reg_opnd(C_RET_OPND); // Every exit to the interpreter should be counted - gen_counter_incr!(asm, leave_interp_return); + gen_counter_incr_without_pc(&mut asm, Counter::leave_interp_return); - asm.comment("exit from leave"); + asm_comment!(asm, "exit from leave"); asm.cpop_into(SP); asm.cpop_into(EC); asm.cpop_into(CFP); @@ -548,52 +1015,86 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { asm.cret(ret_opnd); - asm.compile(ocb); - - return code_ptr; + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -// Generate a runtime guard that ensures the PC is at the expected -// instruction index in the iseq, otherwise takes a side-exit. -// This is to handle the situation of optional parameters. -// When a function with optional parameters is called, the entry -// PC for the method isn't necessarily 0. -fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { - let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); - let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; - let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); +// Increment SP and transfer the execution to the interpreter after jit_exec_exception(). +// On jit_exec_exception(), you need to return Qundef to keep executing caller non-FINISH +// frames on the interpreter. You also need to increment SP to push the return value to +// the caller's stack, which is different from gen_stub_exit(). +fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new_without_iseq(); - asm.cmp(pc_opnd, expected_pc_opnd); + // gen_leave() leaves the return value in C_RET_OPND before coming here. + let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND); - let pc_match = asm.new_label("pc_match"); - asm.je(pc_match); + // Every exit to the interpreter should be counted + gen_counter_incr_without_pc(&mut asm, Counter::leave_interp_return); - // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(asm, leave_start_pc_non_zero); + asm_comment!(asm, "push return value through cfp->sp"); + let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + let sp = asm.load(cfp_sp); + asm.mov(Opnd::mem(64, sp, 0), ruby_ret_val); + let new_sp = asm.add(sp, SIZEOF_VALUE.into()); + asm.mov(cfp_sp, new_sp); + asm_comment!(asm, "exit from exception"); asm.cpop_into(SP); asm.cpop_into(EC); asm.cpop_into(CFP); asm.frame_teardown(); + // Execute vm_exec_core asm.cret(Qundef.into()); - // PC should match the expected insn_idx - asm.write_label(pc_match); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +// Generate a runtime guard that ensures the PC is at the expected +// instruction index in the iseq, otherwise takes an entry stub +// that generates another check and entry. +// This is to handle the situation of optional parameters. +// When a function with optional parameters is called, the entry +// PC for the method isn't necessarily 0. +pub fn gen_entry_chain_guard( + asm: &mut Assembler, + ocb: &mut OutlinedCb, + blockid: BlockId, +) -> Option<PendingEntryRef> { + let entry = new_pending_entry(); + let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?; + + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); + let expected_pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx.into()) }; + let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); + + asm_comment!(asm, "guard expected PC"); + asm.cmp(pc_opnd, expected_pc_opnd); + + asm.mark_entry_start(&entry); + asm.jne(stub_addr.into()); + asm.mark_entry_end(&entry); + return Some(entry); } /// Compile an interpreter entry block to be inserted into an iseq /// Returns None if compilation fails. -pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> { +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See jit_compile_exception() for details. +pub fn gen_entry_prologue( + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, + blockid: BlockId, + stack_size: u8, + jit_exception: bool, +) -> Option<(CodePtr, RegMapping)> { + let iseq = blockid.iseq; let code_ptr = cb.get_write_ptr(); - let mut asm = Assembler::new(); - if get_option_ref!(dump_disasm).is_some() { - asm.comment(&format!("YJIT entry point: {}", iseq_get_location(iseq, 0))); - } else { - asm.comment("YJIT entry"); - } + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); asm.frame_setup(); @@ -610,22 +1111,43 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); // Setup cfp->jit_return - asm.mov( - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), - Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr()), - ); + // If this is an exception handler entry point + if jit_exception { + // On jit_exec_exception(), it's NOT safe to return a non-Qundef value + // from a non-FINISH frame. This function fixes that problem. + // See [jit_compile_exception] for details. + asm.ccall( + rb_yjit_set_exception_return as *mut u8, + vec![ + CFP, + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr(cb)), + ], + ); + } else { + // On jit_exec() or JIT_EXEC(), it's safe to return a non-Qundef value + // on the entry frame. See [jit_compile] for details. + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + ); + } - // We're compiling iseqs that we *expect* to start at `insn_idx`. But in - // the case of optional parameters, the interpreter can set the pc to a - // different location depending on the optional parameters. If an iseq - // has optional parameters, we'll add a runtime check that the PC we've + // We're compiling iseqs that we *expect* to start at `insn_idx`. + // But in the case of optional parameters or when handling exceptions, + // the interpreter can set the pc to a different location. For + // such scenarios, we'll add a runtime check that the PC we've // compiled for is the same PC that the interpreter wants us to run with. - // If they don't match, then we'll take a side exit. - if unsafe { get_iseq_flags_has_opt(iseq) } { - gen_pc_guard(&mut asm, iseq, insn_idx); - } + // If they don't match, then we'll jump to an entry stub and generate + // another PC check and entry there. + let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception { + Some(gen_entry_chain_guard(&mut asm, ocb, blockid)?) + } else { + None + }; + let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size); - asm.compile(cb); + asm.compile(cb, Some(ocb))?; if cb.has_dropped_bytes() { None @@ -635,53 +1157,98 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) { iseq_payload.pages.insert(page); } - Some(code_ptr) + // Write an entry to the heap and push it to the ISEQ + if let Some(pending_entry) = pending_entry { + let pending_entry = Rc::try_unwrap(pending_entry) + .ok().expect("PendingEntry should be unique"); + iseq_payload.entries.push(pending_entry.into_entry()); + } + Some((code_ptr, reg_mapping)) + } +} + +/// Generate code to load registers for a JIT entry. When the entry block is compiled for +/// the first time, it loads no register. When it has been already compiled as a callee +/// block, it loads some registers to reuse the block. +pub fn gen_entry_reg_mapping(asm: &mut Assembler, blockid: BlockId, stack_size: u8) -> RegMapping { + // Find an existing callee block. If it's not found or uses no register, skip loading registers. + let mut ctx = Context::default(); + ctx.set_stack_size(stack_size); + let reg_mapping = find_most_compatible_reg_mapping(blockid, &ctx).unwrap_or(RegMapping::default()); + if reg_mapping == RegMapping::default() { + return reg_mapping; + } + + // If found, load the same registers to reuse the block. + asm_comment!(asm, "reuse maps: {:?}", reg_mapping); + let local_table_size: u32 = unsafe { get_iseq_body_local_table_size(blockid.iseq) }.try_into().unwrap(); + for ®_opnd in reg_mapping.get_reg_opnds().iter() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + let loaded_reg = TEMP_REGS[reg_mapping.get_reg(reg_opnd).unwrap()]; + let loaded_temp = asm.local_opnd(local_table_size - local_idx as u32 + VM_ENV_DATA_SIZE - 1); + asm.load_into(Opnd::Reg(loaded_reg), loaded_temp); + } + RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd), + } } + + reg_mapping } // Generate code to check for interrupts and take a side-exit. // Warning: this function clobbers REG0 -fn gen_check_ints(asm: &mut Assembler, side_exit: Target) { +fn gen_check_ints( + asm: &mut Assembler, + counter: Counter, +) { // Check for interrupts // see RUBY_VM_CHECK_INTS(ec) macro - asm.comment("RUBY_VM_CHECK_INTS(ec)"); + asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)"); // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, // signal_exec, or rb_postponed_job_flush. - let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32)); asm.test(interrupt_flag, interrupt_flag); - asm.jnz(side_exit); + asm.jnz(Target::side_exit(counter)); } // Generate a stubbed unconditional jump to the next bytecode instruction. // Blocks that are part of a guard chain can use this to share the same successor. fn jump_to_next_insn( jit: &mut JITState, - current_context: &Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) { - // Reset the depth since in current usages we only ever jump to to +) -> Option<CodegenStatus> { + end_block_with_jump(jit, asm, jit.next_insn_idx()) +} + +fn end_block_with_jump( + jit: &mut JITState, + asm: &mut Assembler, + continuation_insn_idx: u16, +) -> Option<CodegenStatus> { + // Reset the depth since in current usages we only ever jump to // chain_depth > 0 from the same instruction. - let mut reset_depth = current_context.clone(); - reset_depth.reset_chain_depth(); + let mut reset_depth = asm.ctx; + reset_depth.reset_chain_depth_and_defer(); let jump_block = BlockId { iseq: jit.iseq, - idx: jit_next_insn_idx(jit), + idx: continuation_insn_idx, }; // We are at the end of the current instruction. Record the boundary. if jit.record_boundary_patch_point { - let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; - let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb); - record_global_inval_patch(asm, exit_pos); jit.record_boundary_patch_point = false; + let exit_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, continuation_insn_idx.into())}; + let exit_pos = jit.gen_outlined_exit(exit_pc, &reset_depth); + record_global_inval_patch(asm, exit_pos?); } // Generate the jump instruction gen_direct_jump(jit, &reset_depth, jump_block, asm); + Some(EndBlock) } // Compile a sequence of bytecode instructions for a given basic block version. @@ -694,44 +1261,66 @@ pub fn gen_single_block( ec: EcPtr, cb: &mut CodeBlock, ocb: &mut OutlinedCb, + first_block: bool, ) -> Result<BlockRef, ()> { // Limit the number of specialized versions for this block - let mut ctx = limit_block_versions(blockid, start_ctx); + let ctx = limit_block_versions(blockid, start_ctx); verify_blockid(blockid); assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0)); + // Save machine code placement of the block. `cb` might page switch when we + // generate code in `ocb`. + let block_start_addr = cb.get_write_ptr(); + // Instruction sequence to compile let iseq = blockid.iseq; let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; - let mut insn_idx: c_uint = blockid.idx; - let starting_insn_idx = insn_idx; - - // Allocate the new block - let blockref = Block::new(blockid, &ctx); + let iseq_size: IseqIdx = if let Ok(size) = iseq_size.try_into() { + size + } else { + // ISeq too large to compile + return Err(()); + }; + let mut insn_idx: IseqIdx = blockid.idx; // Initialize a JIT state object - let mut jit = JITState::new(&blockref); + let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block); jit.iseq = blockid.iseq; - jit.ec = Some(ec); - - // Mark the start position of the block - blockref.borrow_mut().set_start_addr(cb.get_write_ptr()); // Create a backend assembler instance - let mut asm = Assembler::new(); + let mut asm = Assembler::new(jit.num_locals()); + asm.ctx = ctx; - #[cfg(feature = "disasm")] if get_option_ref!(dump_disasm).is_some() { let blockid_idx = blockid.idx; - asm.comment(&format!("Block: {} (ISEQ offset: {})", iseq_get_location(blockid.iseq, blockid_idx), blockid_idx)); + let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; + asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth); + asm_comment!(asm, "reg_mapping: {:?}", asm.ctx.get_reg_mapping()); + } + + Log::add_block_with_chain_depth(blockid, asm.ctx.get_chain_depth()); + + // Mark the start of an ISEQ for --yjit-perf + jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ); + + if asm.ctx.is_return_landing() { + // Continuation of the end of gen_leave(). + // Reload REG_SP for the current frame and transfer the return value + // to the stack top. + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + + asm.ctx.clear_return_landing(); } // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { // Get the current pc and opcode - let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() @@ -740,8 +1329,8 @@ pub fn gen_single_block( // We need opt_getconstant_path to be in a block all on its own. Cut the block short // if we run into it. This is necessary because we want to invalidate based on the // instruction's index. - if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > starting_insn_idx { - jump_to_next_insn(&mut jit, &ctx, &mut asm, ocb); + if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > jit.starting_insn_idx { + jump_to_next_insn(&mut jit, &mut asm); break; } @@ -749,31 +1338,38 @@ pub fn gen_single_block( jit.insn_idx = insn_idx; jit.opcode = opcode; jit.pc = pc; - jit.side_exit_for_pc = None; + jit.stack_size_for_pc = asm.ctx.get_stack_size(); + asm.set_side_exit_context(pc, asm.ctx.get_stack_size()); + + // stack_pop doesn't immediately deallocate a register for stack temps, + // but it's safe to do so at this instruction boundary. + for stack_idx in asm.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 { + asm.ctx.dealloc_reg(RegOpnd::Stack(stack_idx)); + } // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it - let exit_pos = gen_outlined_exit(jit.pc, &ctx, ocb); + let exit_pos = jit.gen_outlined_exit(jit.pc, &asm.ctx).ok_or(())?; record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } // In debug mode, verify our existing assumption - if cfg!(debug_assertions) && get_option!(verify_ctx) && jit_at_current_insn(&jit) { - verify_ctx(&jit, &ctx); + if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() { + verify_ctx(&jit, &asm.ctx); } + // :count-placement: + // Count bytecode instructions that execute in generated code. + // Note that the increment happens even when the output takes side exit. + gen_counter_incr(&jit, &mut asm, Counter::yjit_insns_count); + // Lookup the codegen function for this instruction - let mut status = CantCompile; + let mut status = None; if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) { - // :count-placement: - // Count bytecode instructions that execute in generated code. - // Note that the increment happens even when the output takes side exit. - gen_counter_incr!(asm, exec_instruction); - // Add a comment for the name of the YARV instruction - asm.comment(&format!("Insn: {}", insn_name(opcode))); + asm_comment!(asm, "Insn: {:04} {} (stack_size: {})", insn_idx, insn_name(opcode), asm.ctx.get_stack_size()); // If requested, dump instructions for debugging if get_option!(dump_insns) { @@ -782,27 +1378,30 @@ pub fn gen_single_block( } // Call the code generation function - status = gen_fn(&mut jit, &mut ctx, &mut asm, ocb); + jit_perf_symbol_push!(jit, &mut asm, &insn_name(opcode), PerfMap::Codegen); + status = gen_fn(&mut jit, &mut asm); + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::Codegen); + + #[cfg(debug_assertions)] + assert!(!asm.get_leaf_ccall(), "ccall() wasn't used after leaf_ccall was set in {}", insn_name(opcode)); } // If we can't compile this instruction // exit to the interpreter and stop compiling - if status == CantCompile { + if status == None { if get_option!(dump_insns) { println!("can't compile {}", insn_name(opcode)); } - let mut block = jit.block.borrow_mut(); - - // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE, - // the exit this generates would be wrong. We could save a copy of the entry context - // and assert that ctx is the same here. - gen_exit(jit.pc, &ctx, &mut asm); + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + gen_exit(jit.pc, &mut asm); - // If this is the first instruction in the block, then we can use - // the exit for block->entry_exit. - if insn_idx == block.get_blockid().idx { - block.entry_exit = block.get_start_addr(); + // If this is the first instruction in the block, then + // the entry address is the address for block_entry_exit + if insn_idx == jit.starting_insn_idx { + jit.block_entry_exit = Some(jit.output_ptr); } break; @@ -810,347 +1409,384 @@ pub fn gen_single_block( // For now, reset the chain depth after each instruction as only the // first instruction in the block can concern itself with the depth. - ctx.reset_chain_depth(); + asm.ctx.reset_chain_depth_and_defer(); // Move to the next instruction to compile - insn_idx += insn_len(opcode); + insn_idx += insn_len(opcode) as u16; // If the instruction terminates this block - if status == EndBlock { + if status == Some(EndBlock) { break; } } + let end_insn_idx = insn_idx; - // Finish filling out the block - { - let mut block = jit.block.borrow_mut(); - if block.entry_exit.is_some() { - asm.pad_inval_patch(); - } + // We currently can't handle cases where the request is for a block that + // doesn't go to the next instruction in the same iseq. + assert!(!jit.record_boundary_patch_point); - // Compile code into the code block - let gc_offsets = asm.compile(cb); + // Bail when requested to. + if jit.block_abandoned { + incr_counter!(abandoned_block_count); + return Err(()); + } - // Add the GC offsets to the block - block.add_gc_obj_offsets(gc_offsets); + // Pad the block if it has the potential to be invalidated + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } - // Mark the end position of the block - block.set_end_addr(cb.get_write_ptr()); + // Mark the end of an ISEQ for --yjit-perf + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::ISEQ); - // Store the index of the last instruction in the block - block.set_end_idx(insn_idx); - } + // Compile code into the code block + let (_, gc_offsets) = asm.compile(cb, Some(jit.get_ocb())).ok_or(())?; + let end_addr = cb.get_write_ptr(); - // We currently can't handle cases where the request is for a block that - // doesn't go to the next instruction. - assert!(!jit.record_boundary_patch_point); + // Flush perf symbols after asm.compile() writes addresses + if get_option!(perf_map).is_some() { + jit.flush_perf_symbols(cb); + } // If code for the block doesn't fit, fail - if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { - free_block(&blockref); + if cb.has_dropped_bytes() || jit.get_ocb().unwrap().has_dropped_bytes() { return Err(()); } // Block compiled successfully - Ok(blockref) + Ok(jit.into_block(end_insn_idx, block_start_addr, end_addr, gc_offsets)) } fn gen_nop( _jit: &mut JITState, - _ctx: &mut Context, _asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Do nothing - KeepCompiling + Some(KeepCompiling) } fn gen_pop( _jit: &mut JITState, - ctx: &mut Context, - _asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { + asm: &mut Assembler, +) -> Option<CodegenStatus> { // Decrement SP - ctx.stack_pop(1); - KeepCompiling + asm.stack_pop(1); + Some(KeepCompiling) } fn gen_dup( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - - let dup_val = ctx.stack_pop(0); - let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); +) -> Option<CodegenStatus> { + let dup_val = asm.stack_opnd(0); + let mapping = asm.ctx.get_opnd_mapping(dup_val.into()); - let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); + let loc0 = asm.stack_push_mapping(mapping); asm.mov(loc0, dup_val); - KeepCompiling + Some(KeepCompiling) } // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); // In practice, seems to be only used for n==2 if n != 2 { - return CantCompile; + return None; } - let opnd1: Opnd = ctx.stack_opnd(1); - let opnd0: Opnd = ctx.stack_opnd(0); + let opnd1: Opnd = asm.stack_opnd(1); + let opnd0: Opnd = asm.stack_opnd(0); - let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); - let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); + let mapping1 = asm.ctx.get_opnd_mapping(opnd1.into()); + let mapping0 = asm.ctx.get_opnd_mapping(opnd0.into()); - let dst1: Opnd = ctx.stack_push_mapping(mapping1); + let dst1: Opnd = asm.stack_push_mapping(mapping1); asm.mov(dst1, opnd1); - let dst0: Opnd = ctx.stack_push_mapping(mapping0); + let dst0: Opnd = asm.stack_push_mapping(mapping0); asm.mov(dst0, opnd0); - KeepCompiling + Some(KeepCompiling) +} + +// Reverse top X stack entries +fn gen_opt_reverse( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let count = jit.get_arg(0).as_i32(); + for n in 0..(count/2) { + stack_swap(asm, n, count - 1 - n); + } + Some(KeepCompiling) } // Swap top 2 stack entries fn gen_swap( - jit: &mut JITState, - ctx: &mut Context, + _jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - stack_swap(jit, ctx, asm, 0, 1); - KeepCompiling +) -> Option<CodegenStatus> { + stack_swap(asm, 0, 1); + Some(KeepCompiling) } fn stack_swap( - _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - offset0: u16, - offset1: u16, + offset0: i32, + offset1: i32, ) { - let stack0_mem = ctx.stack_opnd(offset0 as i32); - let stack1_mem = ctx.stack_opnd(offset1 as i32); + let stack0_mem = asm.stack_opnd(offset0); + let stack1_mem = asm.stack_opnd(offset1); - let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0)); - let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1)); + let mapping0 = asm.ctx.get_opnd_mapping(stack0_mem.into()); + let mapping1 = asm.ctx.get_opnd_mapping(stack1_mem.into()); let stack0_reg = asm.load(stack0_mem); let stack1_reg = asm.load(stack1_mem); asm.mov(stack0_mem, stack1_reg); asm.mov(stack1_mem, stack0_reg); - ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); - ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); + asm.ctx.set_opnd_mapping(stack0_mem.into(), mapping1); + asm.ctx.set_opnd_mapping(stack1_mem.into(), mapping0); } fn gen_putnil( - jit: &mut JITState, - ctx: &mut Context, + _jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - jit_putobject(jit, ctx, asm, Qnil); - KeepCompiling +) -> Option<CodegenStatus> { + jit_putobject(asm, Qnil); + Some(KeepCompiling) } -fn jit_putobject(_jit: &mut JITState, ctx: &mut Context, asm: &mut Assembler, arg: VALUE) { +fn jit_putobject(asm: &mut Assembler, arg: VALUE) { let val_type: Type = Type::from(arg); - let stack_top = ctx.stack_push(val_type); + let stack_top = asm.stack_push(val_type); asm.mov(stack_top, arg.into()); } fn gen_putobject_int2fix( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { let opcode = jit.opcode; let cst_val: usize = if opcode == YARVINSN_putobject_INT2FIX_0_.as_usize() { 0 } else { 1 }; + let cst_val = VALUE::fixnum_from_usize(cst_val); + + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, cst_val) { + return Some(result); + } - jit_putobject(jit, ctx, asm, VALUE::fixnum_from_usize(cst_val)); - KeepCompiling + jit_putobject(asm, cst_val); + Some(KeepCompiling) } fn gen_putobject( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let arg: VALUE = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let arg: VALUE = jit.get_arg(0); + + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, arg) { + return Some(result); + } - jit_putobject(jit, ctx, asm, arg); - KeepCompiling + jit_putobject(asm, arg); + Some(KeepCompiling) +} + +/// Combine `putobject` and `opt_ltlt` together if profitable, for example when +/// left shifting an integer by a constant amount. +fn fuse_putobject_opt_ltlt( + jit: &mut JITState, + asm: &mut Assembler, + constant_object: VALUE, +) -> Option<CodegenStatus> { + let next_opcode = unsafe { rb_vm_insn_addr2opcode(jit.pc.add(insn_len(jit.opcode).as_usize()).read().as_ptr()) }; + if next_opcode == YARVINSN_opt_ltlt as i32 && constant_object.fixnum_p() { + // Untag the fixnum shift amount + let shift_amt = constant_object.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return None; + } + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let lhs = jit.peek_at_stack(&asm.ctx, 0); + if !lhs.fixnum_p() { + return None; + } + + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_LTLT) { + return None; + } + + asm_comment!(asm, "integer left shift with rhs={shift_amt}"); + let lhs = asm.stack_opnd(0); + + // Guard that lhs is a fixnum if necessary + let lhs_type = asm.ctx.get_opnd_type(lhs.into()); + if lhs_type != Type::Fixnum { + asm_comment!(asm, "guard arg0 fixnum"); + asm.test(lhs, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnums, + ); + } + + asm.stack_pop(1); + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + return end_block_with_jump(jit, asm, jit.next_next_insn_idx()); + } + return None; } fn gen_putself( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Write it on the stack - let stack_top = ctx.stack_push_self(); + let stack_top = asm.stack_push_self(); asm.mov( stack_top, Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF) ); - KeepCompiling + Some(KeepCompiling) } fn gen_putspecialobject( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let object_type = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let object_type = jit.get_arg(0).as_usize(); if object_type == VM_SPECIAL_OBJECT_VMCORE.as_usize() { - let stack_top = ctx.stack_push(Type::UnknownHeap); + let stack_top = asm.stack_push(Type::UnknownHeap); let frozen_core = unsafe { rb_mRubyVMFrozenCore }; asm.mov(stack_top, frozen_core.into()); - KeepCompiling + Some(KeepCompiling) } else { // TODO: implement for VM_SPECIAL_OBJECT_CBASE and // VM_SPECIAL_OBJECT_CONST_BASE - CantCompile + None } } // set Nth stack entry to stack top fn gen_setn( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); - let top_val = ctx.stack_pop(0); - let dst_opnd = ctx.stack_opnd(n.try_into().unwrap()); + let top_val = asm.stack_opnd(0); + let dst_opnd = asm.stack_opnd(n.try_into().unwrap()); asm.mov( dst_opnd, top_val ); - let mapping = ctx.get_opnd_mapping(StackOpnd(0)); - ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping); + let mapping = asm.ctx.get_opnd_mapping(top_val.into()); + asm.ctx.set_opnd_mapping(dst_opnd.into(), mapping); - KeepCompiling + Some(KeepCompiling) } // get nth stack value, then push it fn gen_topn( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); - let top_n_val = ctx.stack_opnd(n.try_into().unwrap()); - let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap())); - let loc0 = ctx.stack_push_mapping(mapping); + let top_n_val = asm.stack_opnd(n.try_into().unwrap()); + let mapping = asm.ctx.get_opnd_mapping(top_n_val.into()); + let loc0 = asm.stack_push_mapping(mapping); asm.mov(loc0, top_n_val); - KeepCompiling + Some(KeepCompiling) } // Pop n values off the stack fn gen_adjuststack( jit: &mut JITState, - ctx: &mut Context, - _cb: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_usize(); - ctx.stack_pop(n); - KeepCompiling + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + asm.stack_pop(n); + Some(KeepCompiling) } fn gen_opt_plus( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Add arg0 + arg1 and test for overflow let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); let out_val = asm.add(arg0_untag, arg1); - asm.jo(side_exit); + asm.jo(Target::side_exit(Counter::opt_plus_overflow)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); + let dst = asm.stack_push(Type::Fixnum); asm.mov(dst, out_val); - KeepCompiling + Some(KeepCompiling) } else { - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } // new array initialized from top N values fn gen_newarray( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_u32(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_u32(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); // If n is 0, then elts is never going to be read, so we can just pass null let values_ptr = if n == 0 { Opnd::UImm(0) } else { - asm.comment("load pointer to array elts"); - let offset_magnitude = (SIZEOF_VALUE as u32) * n; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); asm.lea(values_opnd) }; @@ -1164,24 +1800,22 @@ fn gen_newarray( ] ); - ctx.stack_pop(n.as_usize()); - let stack_ret = ctx.stack_push(Type::CArray); + asm.stack_pop(n.as_usize()); + let stack_ret = asm.stack_push(Type::CArray); asm.mov(stack_ret, new_ary); - KeepCompiling + Some(KeepCompiling) } // dup array fn gen_duparray( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let ary = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let ary = jit.get_arg(0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); // call rb_ary_resurrect(VALUE ary); let new_ary = asm.ccall( @@ -1189,141 +1823,373 @@ fn gen_duparray( vec![ary.into()], ); - let stack_ret = ctx.stack_push(Type::CArray); + let stack_ret = asm.stack_push(Type::CArray); asm.mov(stack_ret, new_ary); - KeepCompiling + Some(KeepCompiling) } // dup hash fn gen_duphash( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let hash = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let hash = jit.get_arg(0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); // call rb_hash_resurrect(VALUE hash); let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); - let stack_ret = ctx.stack_push(Type::Hash); + let stack_ret = asm.stack_push(Type::CHash); asm.mov(stack_ret, hash); - KeepCompiling + Some(KeepCompiling) } // call to_a on the array on the stack fn gen_splatarray( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let flag = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); - // Save the PC and SP because the callee may allocate + // Save the PC and SP because the callee may call #to_a // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Get the operands from the stack - let ary_opnd = ctx.stack_pop(1); + let ary_opnd = asm.stack_opnd(0); // Call rb_vm_splat_array(flag, ary) let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC - let stack_ret = ctx.stack_push(Type::TArray); + let stack_ret = asm.stack_push(Type::TArray); asm.mov(stack_ret, ary); - KeepCompiling + Some(KeepCompiling) +} + +// call to_hash on hash to keyword splat before converting block +// e.g. foo(**object, &block) +fn gen_splatkw( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime hash operand + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let comptime_hash = jit.peek_at_stack(&asm.ctx, 1); + if comptime_hash.hash_p() { + // If a compile-time hash operand is T_HASH, just guard that it's T_HASH. + let hash_opnd = asm.stack_opnd(1); + guard_object_is_hash(asm, hash_opnd, hash_opnd.into(), Counter::splatkw_not_hash); + } else if comptime_hash.nil_p() { + // Speculate we'll see nil if compile-time hash operand is nil + let hash_opnd = asm.stack_opnd(1); + let hash_opnd_type = asm.ctx.get_opnd_type(hash_opnd.into()); + + if hash_opnd_type != Type::Nil { + asm.cmp(hash_opnd, Qnil.into()); + asm.jne(Target::side_exit(Counter::splatkw_not_nil)); + + if Type::Nil.diff(hash_opnd_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(hash_opnd.into(), Type::Nil); + } + } + } else { + // Otherwise, call #to_hash on the operand if it's not nil. + + // Save the PC and SP because the callee may call #to_hash + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let block_opnd = asm.stack_opnd(0); + let block_type = asm.ctx.get_opnd_type(block_opnd.into()); + let hash_opnd = asm.stack_opnd(1); + + c_callable! { + fn to_hash_if_not_nil(mut obj: VALUE) -> VALUE { + if obj != Qnil { + obj = unsafe { rb_to_hash_type(obj) }; + } + obj + } + } + + let hash = asm.ccall(to_hash_if_not_nil as _, vec![hash_opnd]); + asm.stack_pop(2); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, hash); + asm.stack_push(block_type); + // Leave block_opnd spilled by ccall as is + asm.ctx.dealloc_reg(RegOpnd::Stack(asm.ctx.get_stack_size() - 1)); + } + + Some(KeepCompiling) } // concat two arrays fn gen_concatarray( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Save the PC and SP because the callee may allocate +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Get the operands from the stack - let ary2st_opnd = ctx.stack_pop(1); - let ary1_opnd = ctx.stack_pop(1); + let ary2st_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); // Call rb_vm_concat_array(ary1, ary2st) let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// concat second array to first array. +// first argument must already be an array. +// attempts to convert second object to array using to_a. +fn gen_concattoarray( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary2_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); + + let ary = asm.ccall(rb_vm_concat_to_array as *const u8, vec![ary1_opnd, ary2_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// push given number of objects to array directly before. +fn gen_pushtoarray( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u64(); - let stack_ret = ctx.stack_push(Type::TArray); + // Save the PC and SP because the callee may allocate + jit_prepare_call_with_gc(jit, asm); + + // Get the operands from the stack + let ary_opnd = asm.stack_opnd(num as i32); + let objp_opnd = asm.lea(asm.ctx.sp_opnd(-(num as i32))); + + let ary = asm.ccall(rb_ary_cat as *const u8, vec![ary_opnd, objp_opnd, num.into()]); + asm.stack_pop(num as usize + 1); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); asm.mov(stack_ret, ary); - KeepCompiling + Some(KeepCompiling) } // new range initialized from top 2 values fn gen_newrange( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let flag = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); // rb_range_new() allocates and can raise - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // val = rb_range_new(low, high, (int)flag); let range_opnd = asm.ccall( rb_range_new as *const u8, vec![ - ctx.stack_opnd(1), - ctx.stack_opnd(0), + asm.stack_opnd(1), + asm.stack_opnd(0), flag.into() ] ); - ctx.stack_pop(2); - let stack_ret = ctx.stack_push(Type::UnknownHeap); + asm.stack_pop(2); + let stack_ret = asm.stack_push(Type::UnknownHeap); asm.mov(stack_ret, range_opnd); - KeepCompiling + Some(KeepCompiling) } fn guard_object_is_heap( asm: &mut Assembler, - object_opnd: Opnd, - side_exit: Target, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, ) { - asm.comment("guard object is heap"); + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_heap() { + return; + } + + asm_comment!(asm, "guard object is heap"); // Test that the object is not an immediate - asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); - asm.jnz(side_exit); + asm.test(object, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(Target::side_exit(counter)); // Test that the object is not false - asm.cmp(object_opnd, Qfalse.into()); - asm.je(side_exit); + asm.cmp(object, Qfalse.into()); + asm.je(Target::side_exit(counter)); + + if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::UnknownHeap); + } } fn guard_object_is_array( asm: &mut Assembler, - object_opnd: Opnd, - side_exit: Target, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, ) { - asm.comment("guard object is array"); + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_array() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is array"); // Pull out the type mask - let flags_opnd = Opnd::mem(VALUE_BITS, object_opnd, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); // Compare the result with T_ARRAY asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into()); - asm.jne(side_exit); + asm.jne(Target::side_exit(counter)); + + if Type::TArray.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TArray); + } +} + +fn guard_object_is_hash( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_hash() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is hash"); + + // Pull out the type mask + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + // Compare the result with T_HASH + asm.cmp(flags_opnd, (RUBY_T_HASH as u64).into()); + asm.jne(Target::side_exit(counter)); + + if Type::THash.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::THash); + } +} + +fn guard_object_is_fixnum( + jit: &mut JITState, + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_heap() { + asm_comment!(asm, "arg is heap object"); + asm.jmp(Target::side_exit(Counter::guard_send_not_fixnum)); + return; + } + + if object_type != Type::Fixnum && object_type.is_specific() { + asm_comment!(asm, "arg is not fixnum"); + asm.jmp(Target::side_exit(Counter::guard_send_not_fixnum)); + return; + } + + assert!(!object_type.is_heap()); + assert!(object_type == Type::Fixnum || object_type.is_unknown()); + + // If not fixnums at run-time, fall back + if object_type != Type::Fixnum { + asm_comment!(asm, "guard object fixnum"); + asm.test(object, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum, + ); + } + + // Set the stack type in the context. + asm.ctx.upgrade_opnd_type(object.into(), Type::Fixnum); +} + +fn guard_object_is_string( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_string() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is string"); + + // Pull out the type mask + let flags_reg = asm.load(Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS)); + let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); + + // Compare the result with T_STRING + asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); + asm.jne(Target::side_exit(counter)); + + if Type::TString.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TString); + } } /// This guards that a special flag is not set on a hash. @@ -1333,9 +2199,9 @@ fn guard_object_is_array( fn guard_object_is_not_ruby2_keyword_hash( asm: &mut Assembler, object_opnd: Opnd, - side_exit: Target, + counter: Counter, ) { - asm.comment("guard object is not ruby2 keyword hash"); + asm_comment!(asm, "guard object is not ruby2 keyword hash"); let not_ruby2_keyword = asm.new_label("not_ruby2_keyword"); asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); @@ -1355,159 +2221,168 @@ fn guard_object_is_not_ruby2_keyword_hash( asm.jne(not_ruby2_keyword); asm.test(flags_opnd, (RHASH_PASS_AS_KEYWORDS as u64).into()); - asm.jnz(side_exit); + asm.jnz(Target::side_exit(counter)); asm.write_label(not_ruby2_keyword); } -fn guard_object_is_string( - asm: &mut Assembler, - object_reg: Opnd, - side_exit: Target, -) { - asm.comment("guard object is string"); - - // Pull out the type mask - let flags_reg = asm.load(Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS)); - let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); - - // Compare the result with T_STRING - asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); - asm.jne(side_exit); -} - -// push enough nils onto the stack to fill out an array +/// This instruction pops a single value off the stack, converts it to an +/// arrayif it isn’t already one using the #to_ary method, and then pushes +/// the values from the array back onto the stack. fn gen_expandarray( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Both arguments are rb_num_t which is unsigned - let num = jit_get_arg(jit, 0).as_usize(); - let flag = jit_get_arg(jit, 1).as_usize(); + let num = jit.get_arg(0).as_u32(); + let flag = jit.get_arg(1).as_usize(); // If this instruction has the splat flag, then bail out. if flag & 0x01 != 0 { - gen_counter_incr!(asm, expandarray_splat); - return CantCompile; + gen_counter_incr(jit, asm, Counter::expandarray_splat); + return None; } // If this instruction has the postarg flag, then bail out. if flag & 0x02 != 0 { - gen_counter_incr!(asm, expandarray_postarg); - return CantCompile; + gen_counter_incr(jit, asm, Counter::expandarray_postarg); + return None; } - let side_exit = get_side_exit(jit, ocb, ctx); + let array_opnd = asm.stack_opnd(0); - let array_type = ctx.get_opnd_type(StackOpnd(0)); - let array_opnd = ctx.stack_pop(1); + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } - // num is the number of requested values. If there aren't enough in the - // array then we're going to push on nils. - if matches!(array_type, Type::Nil) { - // special case for a, b = nil pattern - // push N nils onto the stack - for _ in 0..num { - let push_opnd = ctx.stack_push(Type::Nil); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); + + // If the comptime receiver is not an array, speculate for when the `rb_check_array_type()` + // conversion returns nil and without side-effects (e.g. arbitrary method calls). + if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } { + // at compile time, ensure to_ary is not defined + let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) }; + let cme_def_type = unsafe { get_cme_def_type(target_cme) }; + + // if to_ary is defined, return can't compile so to_ary can be called + if cme_def_type != VM_METHOD_TYPE_UNDEF { + gen_counter_incr(jit, asm, Counter::expandarray_to_ary); + return None; + } + + // Bail when method_missing is defined to avoid generating code to call it. + // Also, for simplicity, bail when BasicObject#method_missing has been removed. + if !assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(method_missing)) { + gen_counter_incr(jit, asm, Counter::expandarray_method_missing); + return None; + } + + // invalidate compile block if to_ary is later defined + jit.assume_method_lookup_stable(asm, target_cme); + + jit_guard_known_klass( + jit, + asm, + array_opnd, + array_opnd.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::expandarray_not_array, + ); + + let opnd = asm.stack_pop(1); // pop after using the type info + + // If we don't actually want any values, then just keep going + if num == 0 { + return Some(KeepCompiling); + } + + // load opnd to avoid a race because we are also pushing onto the stack + let opnd = asm.load(opnd); + + for _ in 1..num { + let push_opnd = asm.stack_push(Type::Nil); asm.mov(push_opnd, Qnil.into()); } - return KeepCompiling; + + let push_opnd = asm.stack_push(Type::Unknown); + asm.mov(push_opnd, opnd); + + return Some(KeepCompiling); } + // Get the compile-time array length + let comptime_len = unsafe { rb_jit_array_len(comptime_recv) as u32 }; + // Move the array from the stack and check that it's an array. - let array_reg = asm.load(array_opnd); - guard_object_is_heap( - asm, - array_reg, - counted_exit!(ocb, side_exit, expandarray_not_array), - ); guard_object_is_array( asm, - array_reg, - counted_exit!(ocb, side_exit, expandarray_not_array), + array_opnd, + array_opnd.into(), + Counter::expandarray_not_array, ); // If we don't actually want any values, then just return. if num == 0 { - return KeepCompiling; + asm.stack_pop(1); // pop the array + return Some(KeepCompiling); } - // Pull out the embed flag to check if it's an embedded array. - let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); - - // Move the length of the embedded array into REG1. - let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); - let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); - - // Conditionally move the length of the heap array into REG1. - let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); - asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); - let array_len_opnd = Opnd::mem( - (8 * size_of::<std::os::raw::c_long>()) as u8, - asm.load(array_opnd), - RUBY_OFFSET_RARRAY_AS_HEAP_LEN, - ); - let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd); - - // Only handle the case where the number of values in the array is greater - // than or equal to the number of values requested. - asm.cmp(array_len_opnd, num.into()); - asm.jl(counted_exit!(ocb, side_exit, expandarray_rhs_too_small)); - - // Load the address of the embedded array into REG1. - // (struct RArray *)(obj)->as.ary + let array_opnd = asm.stack_opnd(0); let array_reg = asm.load(array_opnd); - let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); + let array_len_opnd = get_array_len(asm, array_reg); - // Conditionally load the address of the heap array into REG1. - // (struct RArray *)(obj)->as.heap.ptr - let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); - asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64)); - let heap_ptr_opnd = Opnd::mem( - (8 * size_of::<usize>()) as u8, - asm.load(array_opnd), - RUBY_OFFSET_RARRAY_AS_HEAP_PTR, - ); - let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); + // Guard on the comptime/expected array length + if comptime_len >= num { + asm_comment!(asm, "guard array length >= {}", num); + asm.cmp(array_len_opnd, num.into()); + jit_chain_guard( + JCC_JB, + jit, + asm, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); - // Loop backward through the array and push each element onto the stack. - for i in (0..num).rev() { - let top = ctx.stack_push(Type::Unknown); - let offset = i32::try_from(i * SIZEOF_VALUE).unwrap(); - asm.mov(top, Opnd::mem(64, ary_opnd, offset)); + } else { + asm_comment!(asm, "guard array length == {}", comptime_len); + asm.cmp(array_len_opnd, comptime_len.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); } - KeepCompiling -} + let array_opnd = asm.stack_pop(1); // pop after using the type info -fn gen_getlocal_wc0( - jit: &mut JITState, - ctx: &mut Context, - asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Compute the offset from BP to the local - let slot_idx = jit_get_arg(jit, 0).as_i32(); - let offs: i32 = -SIZEOF_VALUE_I32 * slot_idx; - let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx); - - // Load environment pointer EP (level 0) from CFP - let ep_opnd = gen_get_ep(asm, 0); + // Load the pointer to the embedded or heap array + let ary_opnd = if comptime_len > 0 { + let array_reg = asm.load(array_opnd); + Some(get_array_ptr(asm, array_reg)) + } else { + None + }; - // Load the local from the EP - let local_opnd = Opnd::mem(64, ep_opnd, offs); + // Loop backward through the array and push each element onto the stack. + for i in (0..num).rev() { + let top = asm.stack_push(if i < comptime_len { Type::Unknown } else { Type::Nil }); + let offset = i32::try_from(i * (SIZEOF_VALUE as u32)).unwrap(); - // Write the local at SP - let stack_top = ctx.stack_push_local(local_idx.as_usize()); - asm.mov(stack_top, local_opnd); + // Missing elements are Qnil + asm_comment!(asm, "load array[{}]", i); + let elem_opnd = if i < comptime_len { Opnd::mem(64, ary_opnd.unwrap(), offset) } else { Qnil.into() }; + asm.mov(top, elem_opnd); + } - KeepCompiling + Some(KeepCompiling) } // Compute the index of a local variable from its slot index -fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { +fn ep_offset_to_local_idx(iseq: IseqPtr, ep_offset: u32) -> u32 { // Layout illustration // This is an array of VALUE // | VM_ENV_DATA_SIZE | @@ -1518,7 +2393,7 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { // ^ ^ ^ ^ // +-------+---local_table_size----+ cfp->ep--+ // | | - // +------------------slot_idx----------------+ + // +------------------ep_offset---------------+ // // See usages of local_var_name() from iseq.c for similar calculation. @@ -1526,7 +2401,7 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) } .try_into() .unwrap(); - let op = slot_idx - (VM_ENV_DATA_SIZE as i32); + let op = (ep_offset - VM_ENV_DATA_SIZE) as i32; let local_idx = local_table_size - op - 1; assert!(local_idx >= 0 && local_idx < local_table_size); local_idx.try_into().unwrap() @@ -1552,7 +2427,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { // Gets the EP of the ISeq of the containing method, or "local level". // Equivalent of GET_LEP() macro. -fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd { +fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { // Equivalent of get_lvar_level() in compile.c fn get_lvar_level(iseq: IseqPtr) -> u32 { if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } { @@ -1567,180 +2442,210 @@ fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd { } fn gen_getlocal_generic( - ctx: &mut Context, + jit: &mut JITState, asm: &mut Assembler, - local_idx: u32, + ep_offset: u32, level: u32, -) -> CodegenStatus { - // Load environment pointer EP (level 0) from CFP - let ep_opnd = gen_get_ep(asm, level); +) -> Option<CodegenStatus> { + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } - // Load the local from the block - // val = *(vm_get_ep(GET_EP(), level) - idx); - let offs = -(SIZEOF_VALUE_I32 * local_idx as i32); - let local_opnd = Opnd::mem(64, ep_opnd, offs); + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) { + // Load the local using SP register + asm.local_opnd(ep_offset) + } else { + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); + + // Load the local from the block + // val = *(vm_get_ep(GET_EP(), level) - idx); + let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); + let local_opnd = Opnd::mem(64, ep_opnd, offs); + + // Write back an argument register to the stack. If the local variable + // is an argument, it might have an allocated register, but if this ISEQ + // is known to escape EP, the register shouldn't be used after this getlocal. + if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() { + asm.mov(local_opnd, asm.local_opnd(ep_offset)); + } + + local_opnd + }; // Write the local at SP - let stack_top = ctx.stack_push(Type::Unknown); + let stack_top = if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset); + asm.stack_push_local(local_idx.as_usize()) + } else { + asm.stack_push(Type::Unknown) + }; asm.mov(stack_top, local_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_getlocal( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0); - let level = jit_get_arg(jit, 1); - gen_getlocal_generic(ctx, asm, idx.as_u32(), level.as_u32()) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_getlocal_generic(jit, asm, idx, level) +} + +fn gen_getlocal_wc0( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, idx, 0) } fn gen_getlocal_wc1( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0); - gen_getlocal_generic(ctx, asm, idx.as_u32(), 1) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, idx, 1) } -fn gen_setlocal_wc0( +fn gen_setlocal_generic( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - /* - vm_env_write(const VALUE *ep, int index, VALUE v) - { - VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS]; - if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) { - VM_STACK_ENV_WRITE(ep, index, v); - } - else { - vm_env_write_slowpath(ep, index, v); + ep_offset: u32, + level: u32, +) -> Option<CodegenStatus> { + // Post condition: The type of of the set local is updated in the Context. + let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + // Fallback because of write barrier + if asm.ctx.get_chain_depth() > 0 { + // Load environment pointer EP at level + let ep_opnd = gen_get_ep(asm, level); + + // This function should not yield to the GC. + // void rb_vm_env_write(const VALUE *ep, int index, VALUE v) + let index = -(ep_offset as i64); + let value_opnd = asm.stack_opnd(0); + asm.ccall( + rb_vm_env_write as *const u8, + vec![ + ep_opnd, + index.into(), + value_opnd, + ] + ); + asm.stack_pop(1); + + // Set local type in the context + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); } + return Some(KeepCompiling); } - */ - let slot_idx = jit_get_arg(jit, 0).as_i32(); - let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx).as_usize(); - let value_type = ctx.get_opnd_type(StackOpnd(0)); + // Split the block if we need to invalidate this instruction when EP escapes + if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() { + return jit.defer_compilation(asm); + } - // Load environment pointer EP (level 0) from CFP - let ep_opnd = gen_get_ep(asm, 0); + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) { + // Load flags and the local using SP register + let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); + let local_opnd = asm.local_opnd(ep_offset); - // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers - // only affect heap objects being written. If we know an immediate value is being written we - // can skip this check. - if !value_type.is_imm() { - // flags & VM_ENV_FLAG_WB_REQUIRED + // Allocate a register to the new local operand + asm.alloc_reg(local_opnd.reg_opnd()); + (flags_opnd, local_opnd) + } else { + // Make sure getlocal doesn't read a stale register. If the local variable + // is an argument, it might have an allocated register, but if this ISEQ + // is known to escape EP, the register shouldn't be used after this setlocal. + if level == 0 { + asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd()); + } + + // Load flags and the local for the level + let ep_opnd = gen_get_ep(asm, level); let flags_opnd = Opnd::mem( 64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - asm.jnz(side_exit); - } - - // Set the type of the local variable in the context - ctx.set_local_type(local_idx, value_type); - - // Pop the value to write from the stack - let stack_top = ctx.stack_pop(1); - - // Write the value at the environment pointer - let offs: i32 = -8 * slot_idx; - asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); - - KeepCompiling -} - -fn gen_setlocal_generic( - jit: &mut JITState, - ctx: &mut Context, - asm: &mut Assembler, - ocb: &mut OutlinedCb, - local_idx: i32, - level: u32, -) -> CodegenStatus { - let value_type = ctx.get_opnd_type(StackOpnd(0)); - - // Load environment pointer EP at level - let ep_opnd = gen_get_ep(asm, level); + (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32)) + }; // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers // only affect heap objects being written. If we know an immediate value is being written we // can skip this check. if !value_type.is_imm() { // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = Opnd::mem( - 64, - ep_opnd, - SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, - ); asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - asm.jnz(side_exit); + assert!(asm.ctx.get_chain_depth() == 0); + jit_chain_guard( + JCC_JNZ, + jit, + asm, + 1, + Counter::setlocal_wb_required, + ); + } + + // Set local type in the context + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); } // Pop the value to write from the stack - let stack_top = ctx.stack_pop(1); + let stack_top = asm.stack_pop(1); // Write the value at the environment pointer - let offs = -(SIZEOF_VALUE_I32 * local_idx); - asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); + asm.mov(local_opnd, stack_top); - KeepCompiling + Some(KeepCompiling) } fn gen_setlocal( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0).as_i32(); - let level = jit_get_arg(jit, 1).as_u32(); - gen_setlocal_generic(jit, ctx, asm, ocb, idx, level) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_setlocal_generic(jit, asm, idx, level) +} + +fn gen_setlocal_wc0( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, idx, 0) } fn gen_setlocal_wc1( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let idx = jit_get_arg(jit, 0).as_i32(); - gen_setlocal_generic(jit, ctx, asm, ocb, idx, 1) +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, idx, 1) } // new hash initialized from top N values fn gen_newhash( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let num: u64 = jit_get_arg(jit, 0).as_u64(); +) -> Option<CodegenStatus> { + let num: u64 = jit.get_arg(0).as_u64(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); if num != 0 { // val = rb_hash_new_with_size(num / 2); @@ -1754,7 +2659,7 @@ fn gen_newhash( asm.cpush(new_hash); // x86 alignment // Get a pointer to the values to insert into the hash - let stack_addr_from_top = asm.lea(ctx.stack_opnd((num - 1) as i32)); + let stack_addr_from_top = asm.lea(asm.stack_opnd((num - 1) as i32)); // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); asm.ccall( @@ -1769,66 +2674,109 @@ fn gen_newhash( let new_hash = asm.cpop(); asm.cpop_into(new_hash); // x86 alignment - ctx.stack_pop(num.try_into().unwrap()); - let stack_ret = ctx.stack_push(Type::Hash); + asm.stack_pop(num.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::CHash); asm.mov(stack_ret, new_hash); } else { // val = rb_hash_new(); let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); - let stack_ret = ctx.stack_push(Type::Hash); + let stack_ret = asm.stack_push(Type::CHash); asm.mov(stack_ret, new_hash); } - KeepCompiling + Some(KeepCompiling) } fn gen_putstring( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let put_val = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); // Save the PC and SP because the callee will allocate - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); let str_opnd = asm.ccall( rb_ec_str_resurrect as *const u8, - vec![EC, put_val.into()] + vec![EC, put_val.into(), 0.into()] ); - let stack_top = ctx.stack_push(Type::CString); + let stack_top = asm.stack_push(Type::CString); asm.mov(stack_top, str_opnd); - KeepCompiling + Some(KeepCompiling) +} + +fn gen_putchilledstring( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); + + // Save the PC and SP because the callee will allocate + jit_prepare_call_with_gc(jit, asm); + + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into(), 1.into()] + ); + + let stack_top = asm.stack_push(Type::CString); + asm.mov(stack_top, str_opnd); + + Some(KeepCompiling) +} + +fn gen_checkmatch( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_u32(); + + // rb_vm_check_match is not leaf unless flag is VM_CHECKMATCH_TYPE_WHEN. + // See also: leafness_of_checkmatch() and check_match() + if flag != VM_CHECKMATCH_TYPE_WHEN { + jit_prepare_non_leaf_call(jit, asm); + } + + let pattern = asm.stack_opnd(0); + let target = asm.stack_opnd(1); + + extern "C" { + fn rb_vm_check_match(ec: EcPtr, target: VALUE, pattern: VALUE, num: u32) -> VALUE; + } + let result = asm.ccall(rb_vm_check_match as *const u8, vec![EC, target, pattern, flag.into()]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, result); + + Some(KeepCompiling) } // Push Qtrue or Qfalse depending on whether the given keyword was supplied by // the caller fn gen_checkkeyword( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // When a keyword is unspecified past index 32, a hash will be used // instead. This can only happen in iseqs taking more than 32 keywords. - if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } { - return CantCompile; + if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= VM_KW_SPECIFIED_BITS_MAX.try_into().unwrap() } { + return None; } // The EP offset to the undefined bits local - let bits_offset = jit_get_arg(jit, 0).as_i32(); + let bits_offset = jit.get_arg(0).as_i32(); // The index of the keyword we want to check - let index: i64 = jit_get_arg(jit, 1).as_i64(); - - // Load environment pointer EP - let ep_opnd = gen_get_ep(asm, 0); + let index: i64 = jit.get_arg(1).as_i64(); - // VALUE kw_bits = *(ep - bits); - let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset); + // `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for + // that "local" when passing it as an argument. We must use such a register to avoid loading + // random bits from the stack if any. We assume that EP is not escaped as of entering a method + // with keyword arguments. + let bits_opnd = asm.local_opnd(bits_offset as u32); // unsigned int b = (unsigned int)FIX2ULONG(kw_bits); // if ((b & (0x01 << idx))) { @@ -1838,142 +2786,63 @@ fn gen_checkkeyword( asm.test(bits_opnd, Opnd::Imm(bit_test)); let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = asm.stack_push(Type::UnknownImm); asm.mov(stack_ret, ret_opnd); - KeepCompiling -} - -fn gen_jnz_to_target0( - asm: &mut Assembler, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => asm.jnz(target0.into()), - } -} - -fn gen_jz_to_target0( - asm: &mut Assembler, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => asm.jz(Target::CodePtr(target0)), - } -} - -fn gen_jbe_to_target0( - asm: &mut Assembler, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => asm.jbe(Target::CodePtr(target0)), - } + Some(KeepCompiling) } // Generate a jump to a stub that recompiles the current YARV instruction on failure. // When depth_limit is exceeded, generate a jump to a side exit. fn jit_chain_guard( jcc: JCCKinds, - jit: &JITState, - ctx: &Context, + jit: &mut JITState, asm: &mut Assembler, - ocb: &mut OutlinedCb, - depth_limit: i32, - side_exit: Target, + depth_limit: u8, + counter: Counter, ) { let target0_gen_fn = match jcc { - JCC_JNE | JCC_JNZ => gen_jnz_to_target0, - JCC_JZ | JCC_JE => gen_jz_to_target0, - JCC_JBE | JCC_JNA => gen_jbe_to_target0, + JCC_JNE | JCC_JNZ => BranchGenFn::JNZToTarget0, + JCC_JZ | JCC_JE => BranchGenFn::JZToTarget0, + JCC_JBE | JCC_JNA => BranchGenFn::JBEToTarget0, + JCC_JB | JCC_JNAE => BranchGenFn::JBToTarget0, + JCC_JO_MUL => BranchGenFn::JOMulToTarget0, }; - if (ctx.get_chain_depth() as i32) < depth_limit { - let mut deeper = ctx.clone(); + if asm.ctx.get_chain_depth() < depth_limit { + // Rewind Context to use the stack_size at the beginning of this instruction. + let mut deeper = asm.ctx.with_stack_size(jit.stack_size_for_pc); deeper.increment_chain_depth(); let bid = BlockId { iseq: jit.iseq, idx: jit.insn_idx, }; - gen_branch(jit, asm, ocb, bid, &deeper, None, None, target0_gen_fn); + jit.gen_branch(asm, bid, &deeper, None, None, target0_gen_fn); } else { - target0_gen_fn(asm, side_exit.unwrap_code_ptr(), None, BranchShape::Default); + target0_gen_fn.call(asm, Target::side_exit(counter), None); } } -// up to 5 different classes, and embedded or not for each -pub const GET_IVAR_MAX_DEPTH: i32 = 10; +// up to 8 different shapes for each +pub const GET_IVAR_MAX_DEPTH: u8 = 8; -// up to 5 different classes, and embedded or not for each -pub const SET_IVAR_MAX_DEPTH: i32 = 10; +// up to 8 different shapes for each +pub const SET_IVAR_MAX_DEPTH: u8 = 8; // hashes and arrays -pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; +pub const OPT_AREF_MAX_CHAIN_DEPTH: u8 = 2; -// up to 5 different classes -pub const SEND_MAX_DEPTH: i32 = 5; +// expandarray +pub const EXPANDARRAY_MAX_CHAIN_DEPTH: u8 = 4; -// up to 20 different methods for send -pub const SEND_MAX_CHAIN_DEPTH: i32 = 20; +// up to 5 different methods for send +pub const SEND_MAX_DEPTH: u8 = 5; // up to 20 different offsets for case-when -pub const CASE_WHEN_MAX_DEPTH: i32 = 20; - -// Codegen for setting an instance variable. -// Preconditions: -// - receiver is in REG0 -// - receiver has the same class as CLASS_OF(comptime_receiver) -// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled -fn gen_set_ivar( - jit: &mut JITState, - ctx: &mut Context, - asm: &mut Assembler, - _recv: VALUE, - ivar_name: ID, - flags: u32, - argc: i32, -) -> CodegenStatus { - - // This is a .send call and we need to adjust the stack - if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); - } - - // Save the PC and SP because the callee may allocate - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, asm); - - // Get the operands from the stack - let val_opnd = ctx.stack_pop(1); - let recv_opnd = ctx.stack_pop(1); - - // Call rb_vm_set_ivar_id with the receiver, the ivar name, and the value - let val = asm.ccall( - rb_vm_set_ivar_id as *const u8, - vec![ - recv_opnd, - Opnd::UImm(ivar_name), - val_opnd, - ], - ); - - let out_opnd = ctx.stack_push(Type::Unknown); - asm.mov(out_opnd, val); - - KeepCompiling -} - +pub const CASE_WHEN_MAX_DEPTH: u8 = 20; +pub const MAX_SPLAT_LENGTH: i32 = 127; // Codegen for getting an instance variable. // Preconditions: @@ -1981,191 +2850,156 @@ fn gen_set_ivar( // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled fn gen_get_ivar( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, - max_chain_depth: i32, + max_chain_depth: u8, comptime_receiver: VALUE, ivar_name: ID, recv: Opnd, recv_opnd: YARVOpnd, - side_exit: Target, -) -> CodegenStatus { - // If the object has a too complex shape, we exit - if comptime_receiver.shape_too_complex() { - return CantCompile; - } - - let comptime_val_klass = comptime_receiver.class_of(); - let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard - +) -> Option<CodegenStatus> { // If recv isn't already a register, load it. let recv = match recv { - Opnd::Reg(_) => recv, + Opnd::InsnOut { .. } => recv, _ => asm.load(recv), }; - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= max_chain_depth; + if megamorphic { + gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic); + } - // If the class uses the default allocator, instances should all be T_OBJECT - // NOTE: This assumes nobody changes the allocator of the class after allocation. - // Eventually, we can encode whether an object is T_OBJECT or not - // inside object shapes. - if !receiver_t_object || uses_custom_allocator { + // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type. + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !comptime_receiver.heap_object_p() || comptime_receiver.shape_too_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) - asm.comment("call rb_ivar_get()"); + asm_comment!(asm, "call rb_ivar_get()"); - // The function could raise exceptions. - jit_prepare_routine_call(jit, ctx, asm); + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]); if recv_opnd != SelfOpnd { - ctx.stack_pop(1); + asm.stack_pop(1); } // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); + let out_opnd = asm.stack_push(Type::Unknown); asm.mov(out_opnd, ivar_val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, asm, ocb); - return EndBlock; + jump_to_next_insn(jit, asm); + return Some(EndBlock); } let ivar_index = unsafe { let shape_id = comptime_receiver.shape_id_of(); - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { + let mut ivar_index: u16 = 0; + if rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) { Some(ivar_index as usize) } else { None } }; - // must be before stack_pop - let recv_type = ctx.get_opnd_type(recv_opnd); - - // Upgrade type - if !recv_type.is_heap() { - ctx.upgrade_opnd_type(recv_opnd, Type::UnknownHeap); - } + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); - // Pop receiver if it's on the temp stack - if recv_opnd != SelfOpnd { - ctx.stack_pop(1); - } - - // Guard heap object - if !recv_type.is_heap() { - guard_object_is_heap(asm, recv, side_exit); - } - - // Compile time self is embedded and the ivar index lands within the object - let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; - - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); - asm.comment("guard shape"); + asm_comment!(asm, "guard shape"); asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); - let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); jit_chain_guard( JCC_JNE, jit, - &starting_context, asm, - ocb, max_chain_depth, - megamorphic_side_exit, + Counter::getivar_megamorphic, ); + // Pop receiver if it's on the temp stack + if recv_opnd != SelfOpnd { + asm.stack_pop(1); + } + match ivar_index { // If there is no IVAR index, then the ivar was undefined // when we entered the compiler. That means we can just return // nil for this shape + iv name None => { - let out_opnd = ctx.stack_push(Type::Nil); + let out_opnd = asm.stack_push(Type::Nil); asm.mov(out_opnd, Qnil.into()); } Some(ivar_index) => { - if embed_test_result { - // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - - // Load the variable - let offs = ROBJECT_OFFSET_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = Opnd::mem(64, recv, offs); - - // Push the ivar on the stack - let out_opnd = ctx.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); + let ivar_opnd = if receiver_t_object { + if comptime_receiver.embedded_p() { + // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h + + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + Opnd::mem(64, recv, offs) + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); + + // Read the ivar from the extended table + Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32) + } } else { - // Compile time value is *not* embedded. - - // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR)); + asm_comment!(asm, "call rb_ivar_get_at()"); - // Read the ivar from the extended table - let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + if assume_single_ractor_mode(jit, asm) { + asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())]) + } else { + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) + } + }; - let out_opnd = ctx.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); - } + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); } } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm); + Some(EndBlock) } fn gen_getinstancevariable( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let ivar_name = jit_get_arg(jit, 0).as_u64(); + let ivar_name = jit.get_arg(0).as_u64(); - let comptime_val = jit_peek_at_self(jit); - - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + let comptime_val = jit.peek_at_self(); // Guard that the receiver has the same class as the one from compile time. let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF); gen_get_ivar( jit, - ctx, asm, - ocb, GET_IVAR_MAX_DEPTH, comptime_val, ivar_name, self_asm_opnd, SelfOpnd, - side_exit, ) } @@ -2185,134 +3019,183 @@ fn gen_write_iv( if embed_test_result { // Find the IV offset - let offs = ROBJECT_OFFSET_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32; + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; let ivar_opnd = Opnd::mem(64, recv, offs); // Write the IV - asm.comment("write IV"); + asm_comment!(asm, "write IV"); asm.mov(ivar_opnd, set_value); } else { // Compile time value is *not* embedded. // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR)); + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); // Write the ivar in to the extended table let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); - asm.comment("write IV"); + asm_comment!(asm, "write IV"); asm.mov(ivar_opnd, set_value); } } fn gen_setinstancevariable( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard - +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let ivar_name = jit_get_arg(jit, 0).as_u64(); - let comptime_receiver = jit_peek_at_self(jit); - let comptime_val_klass = comptime_receiver.class_of(); + let ivar_name = jit.get_arg(0).as_u64(); + let ic = jit.get_arg(1).as_ptr(); + let comptime_receiver = jit.peek_at_self(); + gen_set_ivar( + jit, + asm, + comptime_receiver, + ivar_name, + SelfOpnd, + Some(ic), + ) +} +/// Set an instance variable on setinstancevariable or attr_writer. +/// It switches the behavior based on what recv_opnd is given. +/// * SelfOpnd: setinstancevariable, which doesn't push a result onto the stack. +/// * StackOpnd: attr_writer, which pushes a result onto the stack. +fn gen_set_ivar( + jit: &mut JITState, + asm: &mut Assembler, + comptime_receiver: VALUE, + ivar_name: ID, + recv_opnd: YARVOpnd, + ic: Option<*const iseq_inline_iv_cache_entry>, +) -> Option<CodegenStatus> { // If the comptime receiver is frozen, writing an IV will raise an exception // and we don't want to JIT code to deal with that situation. - // If the object has a too complex shape, we will also exit - if comptime_receiver.is_frozen() || comptime_receiver.shape_too_complex() { - return CantCompile; + if comptime_receiver.is_frozen() { + gen_counter_incr(jit, asm, Counter::setivar_frozen); + return None; } - let (_, stack_type) = ctx.get_opnd_mapping(StackOpnd(0)); - - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; + let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= SET_IVAR_MAX_DEPTH; + if megamorphic { + gen_counter_incr(jit, asm, Counter::num_setivar_megamorphic); + } - // If the receiver isn't a T_OBJECT, or uses a custom allocator, - // then just write out the IV write as a function call - if !receiver_t_object || uses_custom_allocator { - asm.comment("call rb_vm_setinstancevariable()"); + // Get the iv index + let shape_too_complex = comptime_receiver.shape_too_complex(); + let ivar_index = if !comptime_receiver.special_const_p() && !shape_too_complex { + let shape_id = comptime_receiver.shape_id_of(); + let mut ivar_index: u16 = 0; + if unsafe { rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) } { + Some(ivar_index as usize) + } else { + None + } + } else { + None + }; - let ic = jit_get_arg(jit, 1).as_u64(); // type IVC + // The current shape doesn't contain this iv, we need to transition to another shape. + let mut new_shape_too_complex = false; + let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() { + let current_shape_id = comptime_receiver.shape_id_of(); + // We don't need to check about imemo_fields here because we're definitely looking at a T_OBJECT. + let klass = unsafe { rb_obj_class(comptime_receiver) }; + let next_shape_id = unsafe { rb_shape_transition_add_ivar_no_warnings(klass, current_shape_id, ivar_name) }; + + // If the VM ran out of shapes, or this class generated too many leaf, + // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). + new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id) }; + if new_shape_too_complex { + Some((next_shape_id, None, 0_usize)) + } else { + let current_capacity = unsafe { rb_yjit_shape_capacity(current_shape_id) }; + let next_capacity = unsafe { rb_yjit_shape_capacity(next_shape_id) }; - // The function could raise exceptions. - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, asm); + // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to + // reallocate it. + let needs_extension = next_capacity != current_capacity; - // Get the operands from the stack - let val_opnd = ctx.stack_pop(1); + // We can write to the object, but we need to transition the shape + let ivar_index = unsafe { rb_yjit_shape_index(next_shape_id) } as usize; - // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic); - asm.ccall( - rb_vm_setinstancevariable as *const u8, - vec![ - Opnd::const_ptr(jit.iseq as *const u8), - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), - ivar_name.into(), - val_opnd, - Opnd::const_ptr(ic as *const u8), - ] - ); - } else { - // Get the iv index - let ivar_index = unsafe { - let shape_id = comptime_receiver.shape_id_of(); - let shape = rb_shape_get_shape_by_id(shape_id); - let mut ivar_index: u32 = 0; - if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { - Some(ivar_index as usize) + let needs_extension = if needs_extension { + Some((current_capacity, next_capacity)) } else { None - } - }; + }; + Some((next_shape_id, needs_extension, ivar_index)) + } + } else { + None + }; - // Get the receiver - let mut recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call. + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !receiver_t_object || shape_too_complex || new_shape_too_complex || megamorphic { + // The function could raise FrozenError. + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); - let recv_opnd = SelfOpnd; - let recv_type = ctx.get_opnd_type(recv_opnd); + // Get the operands from the stack + let val_opnd = asm.stack_opnd(0); - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + if let StackOpnd(index) = recv_opnd { // attr_writer + let recv = asm.stack_opnd(index as i32); + asm_comment!(asm, "call rb_vm_set_ivar_id()"); + asm.ccall( + rb_vm_set_ivar_id as *const u8, + vec![ + recv, + Opnd::UImm(ivar_name), + val_opnd, + ], + ); + } else { // setinstancevariable + asm_comment!(asm, "call rb_vm_setinstancevariable()"); + asm.ccall( + rb_vm_setinstancevariable as *const u8, + vec![ + VALUE(jit.iseq as usize).into(), + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + ivar_name.into(), + val_opnd, + Opnd::const_ptr(ic.unwrap() as *const u8), + ], + ); + } + } else { + // Get the receiver + let mut recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); // Upgrade type - if !recv_type.is_heap() { // Must be a heap type - ctx.upgrade_opnd_type(recv_opnd, Type::UnknownHeap); - guard_object_is_heap(asm, recv, side_exit); - } + guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap); - let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); - asm.comment("guard shape"); + asm_comment!(asm, "guard shape"); asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); - let megamorphic_side_exit = counted_exit!(ocb, side_exit, setivar_megamorphic); jit_chain_guard( JCC_JNE, jit, - &starting_context, asm, - ocb, SET_IVAR_MAX_DEPTH, - megamorphic_side_exit, + Counter::setivar_megamorphic, ); let write_val; @@ -2321,42 +3204,15 @@ fn gen_setinstancevariable( // If we don't have an instance variable index, then we need to // transition out of the current shape. None => { - let shape = comptime_receiver.shape_of(); - - let current_capacity = unsafe { (*shape).capacity }; - let new_capacity = current_capacity * 2; - - // If the object doesn't have the capacity to store the IV, - // then we'll need to allocate it. - let needs_extension = unsafe { (*shape).next_iv_index >= current_capacity }; - - // We can write to the object, but we need to transition the shape - let ivar_index = unsafe { (*shape).next_iv_index } as usize; - - let capa_shape = if needs_extension { - // We need to add an extended table to the object - // First, create an outgoing transition that increases the - // capacity - Some(unsafe { rb_shape_transition_shape_capa(shape, new_capacity) }) - } else { - None - }; - - let dest_shape = if let Some(capa_shape) = capa_shape { - unsafe { rb_shape_get_next(capa_shape, comptime_receiver, ivar_name) } - } else { - unsafe { rb_shape_get_next(shape, comptime_receiver, ivar_name) } - }; - - let new_shape_id = unsafe { rb_shape_id(dest_shape) }; - - if new_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { - return CantCompile; - } - - if needs_extension { + let (new_shape_id, needs_extension, ivar_index) = new_shape.unwrap(); + if let Some((current_capacity, new_capacity)) = needs_extension { // Generate the C call so that runtime code will increase // the capacity and set the buffer. + asm_comment!(asm, "call rb_ensure_iv_list_size"); + + // It allocates so can trigger GC, which takes the VM lock + // so could yield to a different ractor. + jit_prepare_call_with_gc(jit, asm); asm.ccall(rb_ensure_iv_list_size as *const u8, vec![ recv, @@ -2366,13 +3222,17 @@ fn gen_setinstancevariable( ); // Load the receiver again after the function call - recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)) + recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); } - write_val = ctx.stack_pop(1); - gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension); + write_val = asm.stack_opnd(0); + gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension.is_some()); - asm.comment("write shape"); + asm_comment!(asm, "write shape"); let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -2387,7 +3247,7 @@ fn gen_setinstancevariable( // the iv index by searching up the shape tree. If we've // made the transition already, then there's no reason to // update the shape on the object. Just set the IV. - write_val = ctx.stack_pop(1); + write_val = asm.stack_opnd(0); gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false); }, } @@ -2395,6 +3255,7 @@ fn gen_setinstancevariable( // If we know the stack value is an immediate, there's no need to // generate WB code. if !stack_type.is_imm() { + asm.spill_regs(); // for ccall (unconditionally spill them for RegMappings consistency) let skip_wb = asm.new_label("skip_wb"); // If the value we're writing is an immediate, we don't need to WB asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into()); @@ -2404,7 +3265,7 @@ fn gen_setinstancevariable( asm.cmp(write_val, Qnil.into()); asm.jbe(skip_wb); - asm.comment("write barrier"); + asm_comment!(asm, "write barrier"); asm.ccall( rb_gc_writebarrier as *const u8, vec![ @@ -2416,70 +3277,160 @@ fn gen_setinstancevariable( asm.write_label(skip_wb); } } + let write_val = asm.stack_pop(1); // Keep write_val on stack during ccall for GC + + // If it's attr_writer, i.e. recv_opnd is StackOpnd, we need to pop + // the receiver and push the written value onto the stack. + if let StackOpnd(_) = recv_opnd { + asm.stack_pop(1); // Pop receiver + + let out_opnd = asm.stack_push(Type::Unknown); // Push a return value + asm.mov(out_opnd, write_val); + } - KeepCompiling + Some(KeepCompiling) } fn gen_defined( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let op_type = jit_get_arg(jit, 0).as_u64(); - let obj = jit_get_arg(jit, 1); - let pushval = jit_get_arg(jit, 2); +) -> Option<CodegenStatus> { + let op_type = jit.get_arg(0).as_u64(); + let obj = jit.get_arg(1); + let pushval = jit.get_arg(2); - // Save the PC and SP because the callee may allocate - // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, asm); + match op_type as u32 { + DEFINED_YIELD => { + asm.stack_pop(1); // v operand is not used + let out_opnd = asm.stack_push(Type::Unknown); // nil or "yield" - // Get the operands from the stack - let v_opnd = ctx.stack_pop(1); + gen_block_given(jit, asm, out_opnd, pushval.into(), Qnil.into()); + } + _ => { + // Save the PC and SP because the callee may allocate or call #respond_to? + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); - // Call vm_defined(ec, reg_cfp, op_type, obj, v) - let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); + // Get the operands from the stack + let v_opnd = asm.stack_opnd(0); - // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { - // val = pushval; - // } - asm.test(def_result, Opnd::UImm(255)); - let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + // Call vm_defined(ec, reg_cfp, op_type, obj, v) + let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC - // Push the return value onto the stack - let out_type = if pushval.special_const_p() { - Type::UnknownImm - } else { - Type::Unknown + // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { + Type::UnknownImm + } else { + Type::Unknown + }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + } + } + + Some(KeepCompiling) +} + +fn gen_definedivar( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize base on a runtime receiver + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let ivar_name = jit.get_arg(0).as_u64(); + // Value that will be pushed on the stack if the ivar is defined. In practice this is always the + // string "instance-variable". If the ivar is not defined, nil will be pushed instead. + let pushval = jit.get_arg(2); + + // Get the receiver + let recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + + // Specialize base on compile time values + let comptime_receiver = jit.peek_at_self(); + + if comptime_receiver.special_const_p() || comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { + // Fall back to calling rb_ivar_defined + + // Save the PC and SP because the callee may allocate + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_call_with_gc(jit, asm); + + // Call rb_ivar_defined(recv, ivar_name) + let def_result = asm.ccall(rb_ivar_defined as *const u8, vec![recv, ivar_name.into()]); + + // if (rb_ivar_defined(recv, ivar_name)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { Type::UnknownImm } else { Type::Unknown }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + + return Some(KeepCompiling) + } + + let shape_id = comptime_receiver.shape_id_of(); + let ivar_exists = unsafe { + let mut ivar_index: u16 = 0; + rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) }; - let stack_ret = ctx.stack_push(out_type); - asm.mov(stack_ret, out_value); - KeepCompiling + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, SelfOpnd, Counter::definedivar_not_heap); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(shape_id as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + GET_IVAR_MAX_DEPTH, + Counter::definedivar_megamorphic, + ); + + let result = if ivar_exists { pushval } else { Qnil }; + jit_putobject(asm, result); + + // Jump to next instruction. This allows guard chains to share the same successor. + return jump_to_next_insn(jit, asm); } fn gen_checktype( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let type_val = jit_get_arg(jit, 0).as_u32(); +) -> Option<CodegenStatus> { + let type_val = jit.get_arg(0).as_u32(); // Only three types are emitted by compile.c at the moment if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val { - let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val = asm.load(ctx.stack_pop(1)); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val = asm.stack_pop(1); // Check if we know from type information match val_type.known_value_type() { Some(value_type) => { if value_type == type_val { - jit_putobject(jit, ctx, asm, Qtrue); - return KeepCompiling; + jit_putobject(asm, Qtrue); + return Some(KeepCompiling); } else { - jit_putobject(jit, ctx, asm, Qfalse); - return KeepCompiling; + jit_putobject(asm, Qfalse); + return Some(KeepCompiling); } }, _ => (), @@ -2487,6 +3438,7 @@ fn gen_checktype( let ret = asm.new_label("ret"); + let val = asm.load(val); if !val_type.is_heap() { // if (SPECIAL_CONST_P(val)) { // Return Qfalse via REG1 if not on heap @@ -2504,27 +3456,25 @@ fn gen_checktype( let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); asm.write_label(ret); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = asm.stack_push(Type::UnknownImm); asm.mov(stack_ret, ret_opnd); - KeepCompiling + Some(KeepCompiling) } else { - CantCompile + None } } fn gen_concatstrings( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let n = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); - // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, asm); + // rb_str_concat_literals may raise Encoding::CompatibilityError + jit_prepare_non_leaf_call(jit, asm); - let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n as isize))); + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(n as i32))); // call rb_str_concat_literals(size_t n, const VALUE *strings); let return_value = asm.ccall( @@ -2532,39 +3482,42 @@ fn gen_concatstrings( vec![n.into(), values_ptr] ); - ctx.stack_pop(n); - let stack_ret = ctx.stack_push(Type::CString); + asm.stack_pop(n); + let stack_ret = asm.stack_push(Type::TString); asm.mov(stack_ret, return_value); - KeepCompiling + Some(KeepCompiling) } fn guard_two_fixnums( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, - side_exit: Target ) { + let counter = Counter::guard_send_not_fixnums; + + // Get stack operands without popping them + let arg1 = asm.stack_opnd(0); + let arg0 = asm.stack_opnd(1); + // Get the stack operand types - let arg1_type = ctx.get_opnd_type(StackOpnd(0)); - let arg0_type = ctx.get_opnd_type(StackOpnd(1)); + let arg1_type = asm.ctx.get_opnd_type(arg1.into()); + let arg0_type = asm.ctx.get_opnd_type(arg0.into()); if arg0_type.is_heap() || arg1_type.is_heap() { - asm.comment("arg is heap object"); - asm.jmp(side_exit); + asm_comment!(asm, "arg is heap object"); + asm.jmp(Target::side_exit(counter)); return; } if arg0_type != Type::Fixnum && arg0_type.is_specific() { - asm.comment("arg0 not fixnum"); - asm.jmp(side_exit); + asm_comment!(asm, "arg0 not fixnum"); + asm.jmp(Target::side_exit(counter)); return; } if arg1_type != Type::Fixnum && arg1_type.is_specific() { - asm.comment("arg1 not fixnum"); - asm.jmp(side_exit); + asm_comment!(asm, "arg1 not fixnum"); + asm.jmp(Target::side_exit(counter)); return; } @@ -2573,43 +3526,35 @@ fn guard_two_fixnums( assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown()); assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown()); - // Get stack operands without popping them - let arg1 = ctx.stack_opnd(0); - let arg0 = ctx.stack_opnd(1); - // If not fixnums at run-time, fall back if arg0_type != Type::Fixnum { - asm.comment("guard arg0 fixnum"); + asm_comment!(asm, "guard arg0 fixnum"); asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); jit_chain_guard( JCC_JZ, jit, - &ctx, asm, - ocb, SEND_MAX_DEPTH, - side_exit, + counter, ); } if arg1_type != Type::Fixnum { - asm.comment("guard arg1 fixnum"); + asm_comment!(asm, "guard arg1 fixnum"); asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); jit_chain_guard( JCC_JZ, jit, - &ctx, asm, - ocb, SEND_MAX_DEPTH, - side_exit, + counter, ); } // Set stack types in context - ctx.upgrade_opnd_type(StackOpnd(0), Type::Fixnum); - ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum); + asm.ctx.upgrade_opnd_type(arg1.into(), Type::Fixnum); + asm.ctx.upgrade_opnd_type(arg0.into(), Type::Fixnum); } // Conditional move operation used by comparison operators @@ -2617,753 +3562,732 @@ type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd; fn gen_fixnum_cmp( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, cmov_op: CmovFn, -) -> CodegenStatus { - // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); + bop: ruby_basic_operators, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize based on a runtime receiver + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LT) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, bop) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Compare the arguments asm.cmp(arg0, arg1); let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); // Push the output on the stack - let dst = ctx.stack_push(Type::Unknown); + let dst = asm.stack_push(Type::UnknownImm); asm.mov(dst, bool_opnd); - KeepCompiling + Some(KeepCompiling) } else { - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_lt( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_l) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, Assembler::csel_l, BOP_LT) } fn gen_opt_le( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_le) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, Assembler::csel_le, BOP_LE) } fn gen_opt_ge( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_ge) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, Assembler::csel_ge, BOP_GE) } fn gen_opt_gt( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_g) +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, Assembler::csel_g, BOP_GT) } // Implements specialized equality for either two fixnum or two strings -// Returns true if code was generated, otherwise false +// Returns None if enough type information isn't available, Some(true) +// if code was generated, otherwise Some(false). fn gen_equality_specialized( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, - side_exit: Target, -) -> bool { - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); + gen_eq: bool, +) -> Option<bool> { + let a_opnd = asm.stack_opnd(1); + let b_opnd = asm.stack_opnd(0); - let a_opnd = ctx.stack_opnd(1); - let b_opnd = ctx.stack_opnd(0); + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => return None, + }; - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) { + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) { // if overridden, emit the generic version - return false; + return Some(false); } - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); asm.cmp(a_opnd, b_opnd); - - let val = asm.csel_ne(Qfalse.into(), Qtrue.into()); + let val = if gen_eq { + asm.csel_e(Qtrue.into(), Qfalse.into()) + } else { + asm.csel_ne(Qtrue.into(), Qfalse.into()) + }; // Push the output on the stack - ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); asm.mov(dst, val); - true + return Some(true); } - else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } - { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) { + + if !jit.at_compile_target() { + return None; + } + let comptime_a = jit.peek_at_stack(&asm.ctx, 1); + let comptime_b = jit.peek_at_stack(&asm.ctx, 0); + + if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } { + if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_EQ) { // if overridden, emit the generic version - return false; + return Some(false); } // Guard that a is a String jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cString }, a_opnd, - StackOpnd(1), + a_opnd.into(), comptime_a, SEND_MAX_DEPTH, - side_exit, + Counter::guard_send_not_string, ); let equal = asm.new_label("equal"); let ret = asm.new_label("ret"); + // Spill for ccall. For safety, unconditionally spill temps before branching. + asm.spill_regs(); + // If they are equal by identity, return true asm.cmp(a_opnd, b_opnd); asm.je(equal); // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) - let btype = ctx.get_opnd_type(StackOpnd(0)); + let btype = asm.ctx.get_opnd_type(b_opnd.into()); if btype.known_value_type() != Some(RUBY_T_STRING) { // Note: any T_STRING is valid here, but we check for a ::String for simplicity // To pass a mutable static variable (rb_cString) requires an unsafe block jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cString }, b_opnd, - StackOpnd(0), + b_opnd.into(), comptime_b, SEND_MAX_DEPTH, - side_exit, + Counter::guard_send_not_string, ); } // Call rb_str_eql_internal(a, b) - let val = asm.ccall(rb_str_eql_internal as *const u8, vec![a_opnd, b_opnd]); + let val = asm.ccall( + if gen_eq { rb_str_eql_internal } else { rb_str_neq_internal } as *const u8, + vec![a_opnd, b_opnd], + ); // Push the output on the stack - ctx.stack_pop(2); - let dst = ctx.stack_push(Type::UnknownImm); + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); asm.mov(dst, val); asm.jmp(ret); asm.write_label(equal); - asm.mov(dst, Qtrue.into()); + asm.mov(dst, if gen_eq { Qtrue } else { Qfalse }.into()); asm.write_label(ret); - true + Some(true) } else { - false + Some(false) } } fn gen_opt_eq( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let specialized = match gen_equality_specialized(jit, asm, true) { + Some(specialized) => specialized, + None => { + // Defer compilation so we can specialize base on a runtime receiver + return jit.defer_compilation(asm); + } + }; - if gen_equality_specialized(jit, ctx, asm, ocb, side_exit) { - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + if specialized { + jump_to_next_insn(jit, asm) } else { - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_neq( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // opt_neq is passed two rb_call_data as arguments: // first for ==, second for != - let cd = jit_get_arg(jit, 1).as_ptr(); - return gen_send_general(jit, ctx, asm, ocb, cd, None); + let cd = jit.get_arg(1).as_ptr(); + perf_call! { gen_send_general(jit, asm, cd, None) } } fn gen_opt_aref( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let cd: *const rb_call_data = jit.get_arg(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; // Only JIT one arg calls like `ary[6]` if argc != 1 { - gen_counter_incr!(asm, oaref_argc_not_one); - return CantCompile; + gen_counter_incr(jit, asm, Counter::opt_aref_argc_not_one); + return None; } // Defer compilation so we can specialize base on a runtime receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } // Specialize base on compile time values - let comptime_idx = jit_peek_at_stack(jit, ctx, 0); - let comptime_recv = jit_peek_at_stack(jit, ctx, 1); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 1); if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() { - if !assume_bop_not_redefined(jit, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) { - return CantCompile; + if !assume_bop_not_redefined(jit, asm, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) { + return None; } // Get the stack operands - let idx_opnd = ctx.stack_opnd(0); - let recv_opnd = ctx.stack_opnd(1); + let idx_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); // Guard that the receiver is an ::Array // BOP_AREF check above is only good for ::Array. jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cArray }, recv_opnd, - StackOpnd(1), + recv_opnd.into(), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, - side_exit, + Counter::opt_aref_not_array, ); // Bail if idx is not a FIXNUM let idx_reg = asm.load(idx_opnd); asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into()); - asm.jz(counted_exit!(ocb, side_exit, oaref_arg_not_fixnum)); + asm.jz(Target::side_exit(Counter::opt_aref_arg_not_fixnum)); // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. { + // Pop the argument and the receiver + asm.stack_pop(2); + let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); - // Pop the argument and the receiver - ctx.stack_pop(2); - // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, asm, ocb); - return EndBlock; + return jump_to_next_insn(jit, asm); } else if comptime_recv.class_of() == unsafe { rb_cHash } { - if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { - return CantCompile; + if !assume_bop_not_redefined(jit, asm, HASH_REDEFINED_OP_FLAG, BOP_AREF) { + return None; } - let recv_opnd = ctx.stack_opnd(1); + let recv_opnd = asm.stack_opnd(1); // Guard that the receiver is a hash jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cHash }, recv_opnd, - StackOpnd(1), + recv_opnd.into(), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, - side_exit, + Counter::opt_aref_not_hash, ); // Prepare to call rb_hash_aref(). It might call #hash on the key. - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Call rb_hash_aref - let key_opnd = ctx.stack_opnd(0); - let recv_opnd = ctx.stack_opnd(1); + let key_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]); // Pop the key and the receiver - ctx.stack_pop(2); + asm.stack_pop(2); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) } else { // General case. Call the [] method. - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_aset( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let comptime_recv = jit_peek_at_stack(jit, ctx, 2); - let comptime_key = jit_peek_at_stack(jit, ctx, 1); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 2); + let comptime_key = jit.peek_at_stack(&asm.ctx, 1); // Get the operands from the stack - let recv = ctx.stack_opnd(2); - let key = ctx.stack_opnd(1); - let _val = ctx.stack_opnd(0); + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let _val = asm.stack_opnd(0); if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() { - let side_exit = get_side_exit(jit, ocb, ctx); - // Guard receiver is an Array jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cArray }, recv, - StackOpnd(2), + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_array, ); // Guard key is a fixnum jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cInteger }, key, - StackOpnd(1), + key.into(), comptime_key, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_fixnum, ); // We might allocate or raise - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Call rb_ary_store - let recv = ctx.stack_opnd(2); - let key = asm.load(ctx.stack_opnd(1)); + let recv = asm.stack_opnd(2); + let key = asm.load(asm.stack_opnd(1)); let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key) - let val = ctx.stack_opnd(0); + let val = asm.stack_opnd(0); asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]); // rb_ary_store returns void // stored value should still be on stack - let val = asm.load(ctx.stack_opnd(0)); + let val = asm.load(asm.stack_opnd(0)); // Push the return value onto the stack - ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - jump_to_next_insn(jit, ctx, asm, ocb); - return EndBlock; + return jump_to_next_insn(jit, asm) } else if comptime_recv.class_of() == unsafe { rb_cHash } { - let side_exit = get_side_exit(jit, ocb, ctx); - // Guard receiver is a Hash jit_guard_known_klass( jit, - ctx, asm, - ocb, - unsafe { rb_cHash }, recv, - StackOpnd(2), + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::opt_aset_not_hash, ); // We might allocate or raise - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Call rb_hash_aset - let recv = ctx.stack_opnd(2); - let key = ctx.stack_opnd(1); - let val = ctx.stack_opnd(0); + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let val = asm.stack_opnd(0); let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]); // Push the return value onto the stack - ctx.stack_pop(3); - let stack_ret = ctx.stack_push(Type::Unknown); + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) } else { - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_and( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_AND) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Do the bitwise and arg0 & arg1 let val = asm.and(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - asm.store(dst, val); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_or( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_OR) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Do the bitwise or arg0 | arg1 let val = asm.or(arg0, arg1); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - asm.store(dst, val); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_minus( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Subtract arg0 - arg1 and test for overflow let val_untag = asm.sub(arg0, arg1); - asm.jo(side_exit); + asm.jo(Target::side_exit(Counter::opt_minus_overflow)); let val = asm.add(val_untag, Opnd::Imm(1)); // Push the output on the stack - let dst = ctx.stack_push(Type::Fixnum); - asm.store(dst, val); + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_mult( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + return jit.defer_compilation(asm); + } + }; + + // Fallback to a method call if it overflows + if two_fixnums && asm.ctx.get_chain_depth() == 0 { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm); + + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Do some bitwise gymnastics to handle tag bits + // x * y is translated to (x >> 1) * (y - 1) + 1 + let arg0_untag = asm.rshift(arg0, Opnd::UImm(1)); + let arg1_untag = asm.sub(arg1, Opnd::UImm(1)); + let out_val = asm.mul(arg0_untag, arg1_untag); + jit_chain_guard(JCC_JO_MUL, jit, asm, 1, Counter::opt_mult_overflow); + let out_val = asm.add(out_val, Opnd::UImm(1)); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + Some(KeepCompiling) + } else { + gen_opt_send_without_block(jit, asm) + } } fn gen_opt_div( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } fn gen_opt_mod( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - // Defer compilation so we can specialize on a runtime `self` - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; - } - - let comptime_a = jit_peek_at_stack(jit, ctx, 1); - let comptime_b = jit_peek_at_stack(jit, ctx, 0); - - if comptime_a.fixnum_p() && comptime_b.fixnum_p() { - // Create a side-exit to fall back to the interpreter - // Note: we generate the side-exit before popping operands from the stack - let side_exit = get_side_exit(jit, ocb, ctx); +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + return jit.defer_compilation(asm); + } + }; - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) { - return CantCompile; + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) { + return None; } // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Get the operands and destination from the stack - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); // Check for arg0 % 0 asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64())); - asm.je(side_exit); + asm.je(Target::side_exit(Counter::opt_mod_zero)); // Call rb_fix_mod_fix(VALUE recv, VALUE obj) let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::Unknown); + // When the two arguments are fixnums, the modulo output is always a fixnum + let stack_ret = asm.stack_push(Type::Fixnum); asm.mov(stack_ret, ret); - KeepCompiling + Some(KeepCompiling) } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } } fn gen_opt_ltlt( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } fn gen_opt_nil_p( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } fn gen_opt_empty_p( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } fn gen_opt_succ( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, asm, ocb) + gen_opt_send_without_block(jit, asm) } - fn gen_opt_str_freeze( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { - return CantCompile; +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { + return None; } - let str = jit_get_arg(jit, 0); + let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::CString); + let stack_ret = asm.stack_push(Type::CString); asm.mov(stack_ret, str.into()); - KeepCompiling + Some(KeepCompiling) +} + +fn gen_opt_ary_freeze( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, ARRAY_REDEFINED_OP_FLAG, BOP_FREEZE) { + return None; + } + + let ary = jit.get_arg(0); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::CArray); + asm.mov(stack_ret, ary.into()); + + Some(KeepCompiling) +} + +fn gen_opt_hash_freeze( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, HASH_REDEFINED_OP_FLAG, BOP_FREEZE) { + return None; + } + + let hash = jit.get_arg(0); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, hash.into()); + + Some(KeepCompiling) } fn gen_opt_str_uminus( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { - return CantCompile; +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { + return None; } - let str = jit_get_arg(jit, 0); + let str = jit.get_arg(0); // Push the return value onto the stack - let stack_ret = ctx.stack_push(Type::CString); + let stack_ret = asm.stack_push(Type::CString); asm.mov(stack_ret, str.into()); - KeepCompiling + Some(KeepCompiling) } fn gen_opt_newarray_max( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let num = jit_get_arg(jit, 0).as_u32(); +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u32(); - // Save the PC and SP because we may allocate - jit_prepare_routine_call(jit, ctx, asm); + // Save the PC and SP because we may call #max + jit_prepare_non_leaf_call(jit, asm); extern "C" { fn rb_vm_opt_newarray_max(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; } - let offset_magnitude = (SIZEOF_VALUE as u32) * num; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); let values_ptr = asm.lea(values_opnd); let val_opnd = asm.ccall( @@ -3375,31 +4299,210 @@ fn gen_opt_newarray_max( ], ); - ctx.stack_pop(num.as_usize()); - let stack_ret = ctx.stack_push(Type::Unknown); + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_duparray_send( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let method = jit.get_arg(1).as_u64(); + + if method == ID!(include_p) { + gen_opt_duparray_send_include_p(jit, asm) + } else { + None + } +} + +fn gen_opt_duparray_send_include_p( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + asm_comment!(asm, "opt_duparray_send include_p"); + + let ary = jit.get_arg(0); + let argc = jit.get_arg(2).as_usize(); + + // Save the PC and SP because we may call #include? + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_duparray_include_p(ec: EcPtr, ary: VALUE, target: VALUE) -> VALUE; + } + + let target = asm.ctx.sp_opnd(-1); + + let val_opnd = asm.ccall( + rb_vm_opt_duparray_include_p as *const u8, + vec![ + EC, + ary.into(), + target, + ], + ); + + asm.stack_pop(argc); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_send( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let method = jit.get_arg(1).as_u32(); + + if method == VM_OPT_NEWARRAY_SEND_MIN { + gen_opt_newarray_min(jit, asm) + } else if method == VM_OPT_NEWARRAY_SEND_MAX { + gen_opt_newarray_max(jit, asm) + } else if method == VM_OPT_NEWARRAY_SEND_HASH { + gen_opt_newarray_hash(jit, asm) + } else if method == VM_OPT_NEWARRAY_SEND_INCLUDE_P { + gen_opt_newarray_include_p(jit, asm) + } else if method == VM_OPT_NEWARRAY_SEND_PACK { + gen_opt_newarray_pack_buffer(jit, asm, 1, None) + } else if method == VM_OPT_NEWARRAY_SEND_PACK_BUFFER { + gen_opt_newarray_pack_buffer(jit, asm, 2, Some(1)) + } else { + None + } +} + +fn gen_opt_newarray_pack_buffer( + jit: &mut JITState, + asm: &mut Assembler, + fmt_offset: u32, + buffer: Option<u32>, +) -> Option<CodegenStatus> { + asm_comment!(asm, "opt_newarray_send pack"); + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #pack + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_pack_buffer(ec: EcPtr, num: u32, elts: *const VALUE, fmt: VALUE, buffer: VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let fmt_string = asm.ctx.sp_opnd(-(fmt_offset as i32)); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_pack_buffer as *const u8, + vec![ + EC, + (num - fmt_offset).into(), + values_ptr, + fmt_string, + match buffer { + None => Qundef.into(), + Some(i) => asm.ctx.sp_opnd(-(i as i32)), + }, + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_hash( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #hash + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_hash(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_hash as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val_opnd); - KeepCompiling + Some(KeepCompiling) +} + +fn gen_opt_newarray_include_p( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + asm_comment!(asm, "opt_newarray_send include?"); + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call customized methods. + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_include_p(ec: EcPtr, num: u32, elts: *const VALUE, target: VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + let target = asm.ctx.sp_opnd(-1); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_include_p as *const u8, + vec![ + EC, + (num - 1).into(), + values_ptr, + target + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) } fn gen_opt_newarray_min( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { - let num = jit_get_arg(jit, 0).as_u32(); + let num = jit.get_arg(0).as_u32(); - // Save the PC and SP because we may allocate - jit_prepare_routine_call(jit, ctx, asm); + // Save the PC and SP because we may call #min + jit_prepare_non_leaf_call(jit, asm); extern "C" { fn rb_vm_opt_newarray_min(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; } - let offset_magnitude = (SIZEOF_VALUE as u32) * num; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); let values_ptr = asm.lea(values_opnd); let val_opnd = asm.ccall( @@ -3411,55 +4514,45 @@ fn gen_opt_newarray_min( ], ); - ctx.stack_pop(num.as_usize()); - let stack_ret = ctx.stack_push(Type::Unknown); + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_opt_not( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, asm, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm); } fn gen_opt_size( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, asm, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm); } fn gen_opt_length( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, asm, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm); } fn gen_opt_regexpmatch2( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, asm, ocb); +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm); } fn gen_opt_case_dispatch( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Normally this instruction would lookup the key in a hash and jump to an // offset based on that. // Instead we can take the fallback case and continue with the next @@ -3467,19 +4560,17 @@ fn gen_opt_case_dispatch( // We'd hope that our jitted code will be sufficiently fast without the // hash lookup, at least for small hashes, but it's worth revisiting this // assumption in the future. - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let starting_context = ctx.clone(); - let case_hash = jit_get_arg(jit, 0); - let else_offset = jit_get_arg(jit, 1).as_u32(); + let case_hash = jit.get_arg(0); + let else_offset = jit.get_arg(1).as_u32(); // Try to reorder case/else branches so that ones that are actually used come first. // Supporting only Fixnum for now so that the implementation can be an equality check. - let key_opnd = ctx.stack_pop(1); - let comptime_key = jit_peek_at_stack(jit, ctx, 0); + let key_opnd = asm.stack_opnd(0); + let comptime_key = jit.peek_at_stack(&asm.ctx, 0); // Check that all cases are fixnums to avoid having to register BOP assumptions on // all the types that case hashes support. This spends compile time to save memory. @@ -3500,23 +4591,27 @@ fn gen_opt_case_dispatch( all_fixnum } - if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) { - if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) { - return CantCompile; + // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch + let megamorphic = asm.ctx.get_chain_depth() >= CASE_WHEN_MAX_DEPTH; + if megamorphic { + gen_counter_incr(jit, asm, Counter::num_opt_case_dispatch_megamorphic); + } + + if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) && !megamorphic { + if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) { + return None; } // Check if the key is the same value asm.cmp(key_opnd, comptime_key.into()); - let side_exit = get_side_exit(jit, ocb, &starting_context); jit_chain_guard( JCC_JNE, jit, - &starting_context, asm, - ocb, CASE_WHEN_MAX_DEPTH, - side_exit, + Counter::opt_case_dispatch_megamorphic, ); + asm.stack_pop(1); // Pop key_opnd // Get the offset for the compile-time key let mut offset = 0; @@ -3529,51 +4624,29 @@ fn gen_opt_case_dispatch( }; // Jump to the offset of case or else - let jump_block = BlockId { iseq: jit.iseq, idx: jit_next_insn_idx(jit) + jump_offset }; - gen_direct_jump(jit, &ctx, jump_block, asm); - EndBlock + let jump_idx = jit.next_insn_idx() as u32 + jump_offset; + let jump_block = BlockId { iseq: jit.iseq, idx: jump_idx.try_into().unwrap() }; + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); + Some(EndBlock) } else { - KeepCompiling // continue with === branches - } -} - -fn gen_branchif_branch( - asm: &mut Assembler, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - assert!(target1 != None); - match shape { - BranchShape::Next0 => { - asm.jz(target1.unwrap().into()); - } - BranchShape::Next1 => { - asm.jnz(target0.into()); - } - BranchShape::Default => { - asm.jnz(target0.into()); - asm.jmp(target1.unwrap().into()); - } + asm.stack_pop(1); // Pop key_opnd + Some(KeepCompiling) // continue with === branches } } fn gen_branchif( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(asm, side_exit); + gen_check_ints(asm, Counter::branchif_interrupted); } // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit); + let next_idx = jit.next_insn_idx(); let jump_idx = (next_idx as i32) + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3581,68 +4654,51 @@ fn gen_branchif( }; let jump_block = BlockId { iseq: jit.iseq, - idx: jump_idx as u32, + idx: jump_idx.try_into().unwrap(), }; // Test if any bit (outside of the Qnil bit) is on // See RB_TEST() - let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val_opnd = ctx.stack_pop(1); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); if let Some(result) = val_type.known_truthy() { let target = if result { jump_block } else { next_block }; - gen_direct_jump(jit, ctx, target, asm); + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); } else { asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64())); // Generate the branch instructions - gen_branch( - jit, + let ctx = asm.ctx; + jit.gen_branch( asm, - ocb, jump_block, - ctx, + &ctx, Some(next_block), - Some(ctx), - gen_branchif_branch, + Some(&ctx), + BranchGenFn::BranchIf(Cell::new(BranchShape::Default)), ); } - EndBlock -} - -fn gen_branchunless_branch( - asm: &mut Assembler, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 => asm.jnz(target1.unwrap().into()), - BranchShape::Next1 => asm.jz(target0.into()), - BranchShape::Default => { - asm.jz(target0.into()); - asm.jmp(target1.unwrap().into()); - } - } + Some(EndBlock) } fn gen_branchunless( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(asm, side_exit); + gen_check_ints(asm, Counter::branchunless_interrupted); } // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit) as i32; + let next_idx = jit.next_insn_idx() as i32; let jump_idx = next_idx + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3653,12 +4709,15 @@ fn gen_branchunless( idx: jump_idx.try_into().unwrap(), }; - let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val_opnd = ctx.stack_pop(1); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); if let Some(result) = val_type.known_truthy() { let target = if result { next_block } else { jump_block }; - gen_direct_jump(jit, ctx, target, asm); + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); } else { // Test if any bit (outside of the Qnil bit) is on // See RB_TEST() @@ -3666,53 +4725,33 @@ fn gen_branchunless( asm.test(val_opnd, not_qnil.into()); // Generate the branch instructions - gen_branch( - jit, + let ctx = asm.ctx; + jit.gen_branch( asm, - ocb, jump_block, - ctx, + &ctx, Some(next_block), - Some(ctx), - gen_branchunless_branch, + Some(&ctx), + BranchGenFn::BranchUnless(Cell::new(BranchShape::Default)), ); } - EndBlock -} - -fn gen_branchnil_branch( - asm: &mut Assembler, - target0: CodePtr, - target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 => asm.jne(target1.unwrap().into()), - BranchShape::Next1 => asm.je(target0.into()), - BranchShape::Default => { - asm.je(target0.into()); - asm.jmp(target1.unwrap().into()); - } - } + Some(EndBlock) } fn gen_branchnil( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(asm, side_exit); + gen_check_ints(asm, Counter::branchnil_interrupted); } // Get the branch target instruction offsets - let next_idx = jit_next_insn_idx(jit) as i32; + let next_idx = jit.next_insn_idx() as i32; let jump_idx = next_idx + jump_offset; let next_block = BlockId { iseq: jit.iseq, @@ -3723,56 +4762,160 @@ fn gen_branchnil( idx: jump_idx.try_into().unwrap(), }; - let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val_opnd = ctx.stack_pop(1); + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); if let Some(result) = val_type.known_nil() { let target = if result { jump_block } else { next_block }; - gen_direct_jump(jit, ctx, target, asm); + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); } else { // Test if the value is Qnil asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); // Generate the branch instructions - gen_branch( - jit, + let ctx = asm.ctx; + jit.gen_branch( asm, - ocb, jump_block, - ctx, + &ctx, Some(next_block), - Some(ctx), - gen_branchnil_branch, + Some(&ctx), + BranchGenFn::BranchNil(Cell::new(BranchShape::Default)), ); } - EndBlock + Some(EndBlock) +} + +fn gen_throw( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let throw_state = jit.get_arg(0).as_u64(); + let throwobj = asm.stack_pop(1); + let throwobj = asm.load(throwobj); + + // Gather some statistics about throw + gen_counter_incr(jit, asm, Counter::num_throw); + match (throw_state & VM_THROW_STATE_MASK as u64) as u32 { + RUBY_TAG_BREAK => gen_counter_incr(jit, asm, Counter::num_throw_break), + RUBY_TAG_RETRY => gen_counter_incr(jit, asm, Counter::num_throw_retry), + RUBY_TAG_RETURN => gen_counter_incr(jit, asm, Counter::num_throw_return), + _ => {}, + } + + // THROW_DATA_NEW allocates. Save SP for GC and PC for allocation tracing as + // well as handling the catch table. However, not using jit_prepare_call_with_gc + // since we don't need a patch point for this implementation. + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // rb_vm_throw verifies it's a valid throw, sets ec->tag->state, and returns throw + // data, which is throwobj or a vm_throw_data wrapping it. When ec->tag->state is + // set, JIT code callers will handle the throw with vm_exec_handle_exception. + extern "C" { + fn rb_vm_throw(ec: EcPtr, reg_cfp: CfpPtr, throw_state: u32, throwobj: VALUE) -> VALUE; + } + let val = asm.ccall(rb_vm_throw as *mut u8, vec![EC, CFP, throw_state.into(), throwobj]); + + asm_comment!(asm, "exit from throw"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(val); + Some(EndBlock) +} + +fn gen_opt_new( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + let cd = jit.get_arg(0).as_ptr(); + let jump_offset = jit.get_arg(1).as_i32(); + + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + let mid = unsafe { vm_ci_mid(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + + let recv_idx = argc; + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); + + // This is a singleton class + let comptime_recv_klass = comptime_recv.class_of(); + + let recv = asm.stack_opnd(recv_idx); + + perf_call!("opt_new: ", jit_guard_known_klass( + jit, + asm, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::guard_send_klass_megamorphic, + )); + + // We now know that it's always comptime_recv_klass + if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) { + // Fast path + // call rb_class_alloc to actually allocate + jit_prepare_non_leaf_call(jit, asm); + let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]); + + // Get a reference to the stack location where we need to save the + // return instance. + let result = asm.stack_opnd(recv_idx + 1); + let recv = asm.stack_opnd(recv_idx); + + // Replace the receiver for the upcoming initialize call + asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(recv, obj); + + // Save the allocated object for return + asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap)); + asm.mov(result, obj); + + jump_to_next_insn(jit, asm) + } else { + // general case + + // Get the branch target instruction offsets + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; + return end_block_with_jump(jit, asm, jump_idx as u16); + } } fn gen_jump( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let jump_offset = jit_get_arg(jit, 0).as_i32(); +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { - let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(asm, side_exit); + gen_check_ints(asm, Counter::jump_interrupted); } // Get the branch target instruction offsets - let jump_idx = (jit_next_insn_idx(jit) as i32) + jump_offset; + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; let jump_block = BlockId { iseq: jit.iseq, - idx: jump_idx as u32, + idx: jump_idx.try_into().unwrap(), }; // Generate the jump instruction - gen_direct_jump(jit, ctx, jump_block, asm); + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); - EndBlock + Some(EndBlock) } /// Guard that self or a stack operand has the same class as `known_klass`, using @@ -3783,60 +4926,68 @@ fn gen_jump( /// Recompile as contingency if possible, or take side exit a last resort. fn jit_guard_known_klass( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, - known_klass: VALUE, obj_opnd: Opnd, insn_opnd: YARVOpnd, sample_instance: VALUE, - max_chain_depth: i32, - side_exit: Target, + max_chain_depth: u8, + counter: Counter, ) { - let val_type = ctx.get_opnd_type(insn_opnd); + let known_klass = sample_instance.class_of(); + let val_type = asm.ctx.get_opnd_type(insn_opnd); if val_type.known_class() == Some(known_klass) { - // We already know from type information that this is a match - return; + // Unless frozen, Array, Hash, and String objects may change their RBASIC_CLASS + // when they get a singleton class. Those types need invalidations. + if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&known_klass) } { + if jit.assume_no_singleton_class(asm, known_klass) { + // Speculate that this object will not have a singleton class, + // and invalidate the block in case it does. + return; + } + } else { + // We already know from type information that this is a match + return; + } } if unsafe { known_klass == rb_cNilClass } { assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - asm.comment("guard object is nil"); + asm_comment!(asm, "guard object is nil"); asm.cmp(obj_opnd, Qnil.into()); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::Nil); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Nil); } else if unsafe { known_klass == rb_cTrueClass } { assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - asm.comment("guard object is true"); + asm_comment!(asm, "guard object is true"); asm.cmp(obj_opnd, Qtrue.into()); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::True); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::True); } else if unsafe { known_klass == rb_cFalseClass } { assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - asm.comment("guard object is false"); + asm_comment!(asm, "guard object is false"); assert!(Qfalse.as_i32() == 0); asm.test(obj_opnd, obj_opnd); - jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNZ, jit, asm, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::False); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::False); } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { // We will guard fixnum and bignum as though they were separate classes // BIGNUM can be handled by the general else case below assert!(val_type.is_unknown()); - asm.comment("guard object is fixnum"); + asm_comment!(asm, "guard object is fixnum"); asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); - jit_chain_guard(JCC_JZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); + jit_chain_guard(JCC_JZ, jit, asm, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { assert!(!val_type.is_heap()); // We will guard STATIC vs DYNAMIC as though they were separate classes @@ -3844,11 +4995,11 @@ fn jit_guard_known_klass( if val_type != Type::ImmSymbol || !val_type.is_imm() { assert!(val_type.is_unknown()); - asm.comment("guard object is static symbol"); + asm_comment!(asm, "guard object is static symbol"); assert!(RUBY_SPECIAL_SHIFT == 8); asm.cmp(obj_opnd.with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); } } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { assert!(!val_type.is_heap()); @@ -3856,15 +5007,16 @@ fn jit_guard_known_klass( assert!(val_type.is_unknown()); // We will guard flonum vs heap float as though they were separate classes - asm.comment("guard object is flonum"); + asm_comment!(asm, "guard object is flonum"); let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); } } else if unsafe { FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0) - && sample_instance == rb_attr_get(known_klass, id__attached__ as ID) + && sample_instance == rb_class_attached_object(known_klass) + && !rb_obj_is_kind_of(sample_instance, rb_cIO).test() } { // Singleton classes are attached to one specific object, so we can // avoid one memory access (and potentially the is_heap check) by @@ -3876,46 +5028,50 @@ fn jit_guard_known_klass( // that its singleton class is empty, so we can't avoid the memory // access. As an example, `Object.new.singleton_class` is an object in // this situation. - asm.comment("guard known object with singleton class"); + // Also, guarding by identity is incorrect for IO objects because + // IO#reopen can be used to change the class and singleton class of IO objects! + asm_comment!(asm, "guard known object with singleton class"); asm.cmp(obj_opnd, sample_instance.into()); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { // guard elided because the context says we've already checked unsafe { assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") }; } else { - assert!(!val_type.is_imm()); + assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}"); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. if !val_type.is_heap() { - asm.comment("guard not immediate"); + asm_comment!(asm, "guard not immediate"); asm.test(obj_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); - jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNZ, jit, asm, max_chain_depth, counter); asm.cmp(obj_opnd, Qfalse.into()); - jit_chain_guard(JCC_JE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JE, jit, asm, max_chain_depth, counter); - ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } // If obj_opnd isn't already a register, load it. let obj_opnd = match obj_opnd { - Opnd::Reg(_) => obj_opnd, + Opnd::InsnOut { .. } => obj_opnd, _ => asm.load(obj_opnd), }; let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS); // Bail if receiver class is different from known_klass // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class. - asm.comment("guard known class"); + asm_comment!(asm, "guard known class"); asm.cmp(klass_opnd, known_klass.into()); - jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter); if known_klass == unsafe { rb_cString } { - ctx.upgrade_opnd_type(insn_opnd, Type::CString); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CString); } else if known_klass == unsafe { rb_cArray } { - ctx.upgrade_opnd_type(insn_opnd, Type::CArray); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CArray); + } else if known_klass == unsafe { rb_cHash } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CHash); } } } @@ -3923,11 +5079,8 @@ fn jit_guard_known_klass( // Generate ancestry guard for protected callee. // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( - _jit: &mut JITState, asm: &mut Assembler, - ocb: &mut OutlinedCb, cme: *const rb_callable_method_entry_t, - side_exit: Target, ) { // See vm_call_method(). let def_class = unsafe { (*cme).defined_class }; @@ -3942,7 +5095,7 @@ fn jit_protected_callee_ancestry_guard( ], ); asm.test(val, val); - asm.jz(counted_exit!(ocb, side_exit, send_se_protected_check_failed)) + asm.jz(Target::side_exit(Counter::guard_send_se_protected_check_failed)) } // Codegen for rb_obj_not(). @@ -3950,29 +5103,27 @@ fn jit_protected_callee_ancestry_guard( // arity guards. fn jit_rb_obj_not( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - let recv_opnd = ctx.get_opnd_type(StackOpnd(0)); + let recv_opnd = asm.ctx.get_opnd_type(StackOpnd(0)); match recv_opnd.known_truthy() { Some(false) => { - asm.comment("rb_obj_not(nil_or_false)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::True); + asm_comment!(asm, "rb_obj_not(nil_or_false)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::True); asm.mov(out_opnd, Qtrue.into()); }, Some(true) => { // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. - asm.comment("rb_obj_not(truthy)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::False); + asm_comment!(asm, "rb_obj_not(truthy)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::False); asm.mov(out_opnd, Qfalse.into()); }, _ => { @@ -3986,18 +5137,16 @@ fn jit_rb_obj_not( // Codegen for rb_true() fn jit_rb_true( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("nil? == true"); - ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::True); + asm_comment!(asm, "nil? == true"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::True); asm.mov(stack_ret, Qtrue.into()); true } @@ -4005,104 +5154,719 @@ fn jit_rb_true( // Codegen for rb_false() fn jit_rb_false( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("nil? == false"); - ctx.stack_pop(1); - let stack_ret = ctx.stack_push(Type::False); + asm_comment!(asm, "nil? == false"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::False); asm.mov(stack_ret, Qfalse.into()); true } +/// Codegen for Kernel#is_a? +fn jit_rb_kernel_is_a( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - In general, for any two Class instances A, B, `A < B` does not change at runtime. + // Class#superclass is stable. + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // We are not allowing module here because the module hierarchy can change at runtime. + if !unsafe { RB_TYPE_P(sample_rhs, RUBY_T_CLASS) } { + return false; + } + let sample_is_a = unsafe { rb_obj_is_kind_of(sample_lhs, sample_rhs) == Qtrue }; + + asm_comment!(asm, "Kernel#is_a?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + asm.jne(Target::side_exit(Counter::guard_send_is_a_class_mismatch)); + + asm.stack_pop(2); + + if sample_is_a { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +/// Codegen for Kernel#instance_of? +fn jit_rb_kernel_instance_of( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - For a particular `CLASS_OF(lhs)`, `rb_obj_class(lhs)` does not change. + // (because for any singleton class `s`, `s.superclass.equal?(s.attached_object.class)`) + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // Filters out cases where the C implementation raises + if unsafe { !(RB_TYPE_P(sample_rhs, RUBY_T_CLASS) || RB_TYPE_P(sample_rhs, RUBY_T_MODULE)) } { + return false; + } + + // We need to grab the class here to deal with singleton classes. + // Instance of grabs the "real class" of the object rather than the + // singleton class. + let sample_lhs_real_class = unsafe { rb_obj_class(sample_lhs) }; + + let sample_instance_of = sample_lhs_real_class == sample_rhs; + + asm_comment!(asm, "Kernel#instance_of?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_instance_of_class_mismatch, + ); + + asm.stack_pop(2); + + if sample_instance_of { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +fn jit_rb_mod_eqq( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + asm_comment!(asm, "Module#==="); + // By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can + // only live on these objects. With that, we can call rb_obj_is_kind_of() without + // jit_prepare_non_leaf_call() or a control frame push because it can't raise, allocate, or call + // Ruby methods with these inputs. + // Note the difference in approach from Kernel#is_a? because we don't get a free guard for the + // right hand side. + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); // the module + let ret = asm.ccall(rb_obj_is_kind_of as *const u8, vec![rhs, lhs]); + + // Return the result + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret); + + return true; +} + +// Substitution for rb_mod_name(). Returns the name of a module/class. +fn jit_rb_mod_name( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 0 { + return false; + } + + asm_comment!(asm, "Module#name"); + + // rb_mod_name() never allocates, so no preparation needed. + let name = asm.ccall(rb_mod_name as _, vec![asm.stack_opnd(0)]); + + let _ = asm.stack_pop(1); // pop self + // call-seq: mod.name -> string or nil + let ret = asm.stack_push(Type::Unknown); + asm.mov(ret, name); + + true +} + // Codegen for rb_obj_equal() // object identity comparison fn jit_rb_obj_equal( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("equal?"); - let obj1 = ctx.stack_pop(1); - let obj2 = ctx.stack_pop(1); + asm_comment!(asm, "equal?"); + let obj1 = asm.stack_pop(1); + let obj2 = asm.stack_pop(1); asm.cmp(obj1, obj2); let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = asm.stack_push(Type::UnknownImm); asm.mov(stack_ret, ret_opnd); true } +// Codegen for rb_obj_not_equal() +// object identity comparison +fn jit_rb_obj_not_equal( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + gen_equality_specialized(jit, asm, false) == Some(true) +} + // Codegen for rb_int_equal() fn jit_rb_int_equal( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - let side_exit = get_side_exit(jit, ocb, ctx); - // Check that both operands are fixnums - guard_two_fixnums(jit, ctx, asm, ocb, side_exit); + guard_two_fixnums(jit, asm); // Compare the arguments - asm.comment("rb_int_equal"); - let arg1 = ctx.stack_pop(1); - let arg0 = ctx.stack_pop(1); + asm_comment!(asm, "rb_int_equal"); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); asm.cmp(arg0, arg1); let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); - let stack_ret = ctx.stack_push(Type::UnknownImm); + let stack_ret = asm.stack_push(Type::UnknownImm); asm.mov(stack_ret, ret_opnd); true } -/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it. -fn jit_rb_str_uplus( +fn jit_rb_int_succ( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard the receiver is fixnum + let recv_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let recv = asm.stack_pop(1); + if recv_type != Type::Fixnum { + asm_comment!(asm, "guard object is fixnum"); + asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + asm.jz(Target::side_exit(Counter::opt_succ_not_fixnum)); + } + + asm_comment!(asm, "Integer#succ"); + let out_val = asm.add(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1 + asm.jo(Target::side_exit(Counter::opt_succ_overflow)); + + // Push the output onto the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + true +} + +fn jit_rb_int_pred( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard the receiver is fixnum + let recv_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let recv = asm.stack_pop(1); + if recv_type != Type::Fixnum { + asm_comment!(asm, "guard object is fixnum"); + asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + asm.jz(Target::side_exit(Counter::send_pred_not_fixnum)); + } + + asm_comment!(asm, "Integer#pred"); + let out_val = asm.sub(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1 + asm.jo(Target::side_exit(Counter::send_pred_underflow)); + + // Push the output onto the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + true +} + +fn jit_rb_int_div( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm); + + // rb_fix_div_fix may GC-allocate for Bignum + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Integer#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + // Check for arg0 % 0 + asm.cmp(obj, VALUE::fixnum_from_usize(0).as_i64().into()); + asm.je(Target::side_exit(Counter::opt_div_zero)); + + let ret = asm.ccall(rb_fix_div_fix as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep them during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_int_lshift( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + if !comptime_shift.fixnum_p() { + return false; + } + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + 1, + Counter::lshift_amount_changed, + ); + + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + true +} + +fn fixnum_left_shift_body(asm: &mut Assembler, lhs: Opnd, shift_amt: u64) { + let in_val = asm.sub(lhs, 1.into()); + let shift_opnd = Opnd::UImm(shift_amt); + let out_val = asm.lshift(in_val, shift_opnd); + let unshifted = asm.rshift(out_val, shift_opnd); + + // Guard that we did not overflow + asm.cmp(unshifted, in_val); + asm.jne(Target::side_exit(Counter::lshift_overflow)); + + // Re-tag the output value + let out_val = asm.add(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); +} + +fn jit_rb_int_rshift( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + 1, + Counter::rshift_amount_changed, + ); + + let shift_opnd = Opnd::UImm(shift_amt as u64); + let out_val = asm.rshift(lhs, shift_opnd); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_xor( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm); + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // XOR and then re-tag the resulting fixnum + let out_val = asm.xor(lhs, rhs); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_aref( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm); + + asm_comment!(asm, "Integer#[]"); + let obj = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + let ret = asm.ccall(rb_fix_aref as *const u8, vec![recv, obj]); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_plus( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#+"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_plus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_minus( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#-"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_minus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_mul( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#*"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_mul as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_div( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_div as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it. +fn jit_rb_str_uplus( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("Unary plus on string"); - let recv_opnd = asm.load(ctx.stack_pop(1)); + if argc != 0 { + return false; + } + + // We allocate when we dup the string + jit_prepare_call_with_gc(jit, asm); + asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency. + + asm_comment!(asm, "Unary plus on string"); + let recv_opnd = asm.stack_pop(1); + let recv_opnd = asm.load(recv_opnd); let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS)); - asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64)); + asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64 | RSTRING_CHILLED as i64)); let ret_label = asm.new_label("stack_ret"); - // We guard for the receiver being a ::String, so the return value is too - let stack_ret = ctx.stack_push(Type::CString); + // String#+@ can only exist on T_STRING + let stack_ret = asm.stack_push(Type::TString); // If the string isn't frozen, we just return it. asm.mov(stack_ret, recv_opnd); asm.jz(ret_label); // Str is frozen - duplicate it + asm.spill_regs(); // for ccall let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); asm.mov(stack_ret, ret_opnd); @@ -4111,23 +5875,272 @@ fn jit_rb_str_uplus( true } +fn jit_rb_str_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#length"); + extern "C" { + fn rb_str_length(str: VALUE) -> VALUE; + } + + // This function cannot allocate or raise an exceptions + let recv = asm.stack_opnd(0); + let ret_opnd = asm.ccall(rb_str_length as *const u8, vec![recv]); + asm.stack_pop(1); // Keep recv on stack during ccall for GC + + // Should be guaranteed to be a fixnum on 64-bit systems + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, ret_opnd); + + true +} + fn jit_rb_str_bytesize( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("String#bytesize"); + asm_comment!(asm, "String#bytesize"); + + let recv = asm.stack_pop(1); - let recv = ctx.stack_pop(1); - let ret_opnd = asm.ccall(rb_str_bytesize as *const u8, vec![recv]); + asm_comment!(asm, "get string length"); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); - let out_opnd = ctx.stack_push(Type::Fixnum); + let len = asm.load(str_len_opnd); + let shifted_val = asm.lshift(len, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + + asm.mov(out_opnd, out_val); + + true +} + +fn jit_rb_str_byteslice( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 2 { + return false + } + + // rb_str_byte_substr should be leaf if indexes are fixnums + match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) { + (Type::Fixnum, Type::Fixnum) => {}, + // Raises when non-integers are passed in, which requires the method frame + // to be pushed for the backtrace + _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + } + asm_comment!(asm, "String#byteslice"); + + // rb_str_byte_substr allocates a substring + jit_prepare_call_with_gc(jit, asm); + + // Get stack operands after potential SP change + let len = asm.stack_opnd(0); + let beg = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]); + asm.stack_pop(3); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ret_opnd); + + true +} + +fn jit_rb_str_aref_m( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // In yjit-bench the most common usages by far are single fixnum or two fixnums. + // rb_str_substr should be leaf if indexes are fixnums + if argc == 2 { + match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) { + (Type::Fixnum, Type::Fixnum) => {}, + // There is a two-argument form of (RegExp, Fixnum) which needs a different c func. + // Other types will raise. + _ => { return false }, + } + } else if argc == 1 { + match asm.ctx.get_opnd_type(StackOpnd(0)) { + Type::Fixnum => {}, + // Besides Fixnum this could also be a Range or a RegExp which are handled by separate c funcs. + // Other types will raise. + _ => { + // If the context doesn't have the type info we try a little harder. + let comptime_arg = jit.peek_at_stack(&asm.ctx, 0); + let arg0 = asm.stack_opnd(0); + if comptime_arg.fixnum_p() { + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_str_aref_not_fixnum, + ); + } else { + return false + } + }, + } + } else { + return false + } + + asm_comment!(asm, "String#[]"); + + // rb_str_substr allocates a substring + jit_prepare_call_with_gc(jit, asm); + + // Get stack operands after potential SP change + + // The "empty" arg distinguishes between the normal "one arg" behavior + // and the "two arg" special case that returns an empty string + // when the begin index is the length of the string. + // See the usages of rb_str_substr in string.c for more information. + let (beg_idx, empty, len) = if argc == 2 { + (1, Opnd::Imm(1), asm.stack_opnd(0)) + } else { + // If there is only one arg, the length will be 1. + (0, Opnd::Imm(0), VALUE::fixnum_from_usize(1).into()) + }; + + let beg = asm.stack_opnd(beg_idx); + let recv = asm.stack_opnd(beg_idx + 1); + + let ret_opnd = asm.ccall(rb_str_substr_two_fixnums as *const u8, vec![recv, beg, len, empty]); + asm.stack_pop(beg_idx as usize + 2); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ret_opnd); + + true +} + +fn jit_rb_str_getbyte( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#getbyte"); + + // Don't pop since we may bail + let idx = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + if comptime_idx.fixnum_p(){ + jit_guard_known_klass( + jit, + asm, + idx, + idx.into(), + comptime_idx, + SEND_MAX_DEPTH, + Counter::getbyte_idx_not_fixnum, + ); + } else { + return false; + } + + // Untag the index + let idx = asm.rshift(idx, Opnd::UImm(1)); + + // If index is negative, exit + asm.cmp(idx, Opnd::UImm(0)); + asm.jl(Target::side_exit(Counter::getbyte_idx_negative)); + + asm_comment!(asm, "get string length"); + let recv = asm.load(recv); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + // Exit if the index is out of bounds + asm.cmp(idx, str_len_opnd); + asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds)); + + let str_ptr = get_string_ptr(asm, recv); + // FIXME: could use SIB indexing here with proper support in backend + let str_ptr = asm.add(str_ptr, idx); + let byte = asm.load(Opnd::mem(8, str_ptr, 0)); + + // Zero-extend the byte to 64 bits + let byte = byte.with_num_bits(64).unwrap(); + let byte = asm.and(byte, 0xFF.into()); + + // Tag the byte + let byte = asm.lshift(byte, Opnd::UImm(1)); + let byte = asm.or(byte, Opnd::UImm(1)); + + asm.stack_pop(2); // Keep them on stack during ccall for GC + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, byte); + + true +} + +fn jit_rb_str_setbyte( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Raises when index is out of range. Lazily push a frame in that case. + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + asm_comment!(asm, "String#setbyte"); + + let value = asm.stack_opnd(0); + let index = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]); + asm.stack_pop(3); // Keep them on stack during ccall for GC + + let out_opnd = asm.stack_push(Type::UnknownImm); asm.mov(out_opnd, ret_opnd); true @@ -4139,17 +6152,15 @@ fn jit_rb_str_bytesize( // this situation happens a lot in some workloads. fn jit_rb_str_to_s( _jit: &mut JITState, - _ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool { - if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } { - asm.comment("to_s on plain string"); + if unsafe { known_recv_class == Some(rb_cString) } { + asm_comment!(asm, "to_s on plain string"); // The method returns the receiver, which is already on the stack. // No stack movement. return true; @@ -4157,87 +6168,147 @@ fn jit_rb_str_to_s( false } -// Codegen for rb_str_empty() -fn jit_rb_str_empty( - _jit: &mut JITState, - ctx: &mut Context, +fn jit_rb_str_dup( + jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool { - const _: () = assert!( - RUBY_OFFSET_RSTRING_AS_HEAP_LEN == RUBY_OFFSET_RSTRING_EMBED_LEN, - "same offset to len embedded or not so we can use one code path to read the length", - ); + // We specialize only the BARE_STRING_P case. Otherwise it's not leaf. + if unsafe { known_recv_class != Some(rb_cString) } { + return false; + } + asm_comment!(asm, "String#dup"); + + jit_prepare_call_with_gc(jit, asm); - let recv_opnd = ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::UnknownImm); + let recv_opnd = asm.stack_opnd(0); + let recv_opnd = asm.load(recv_opnd); + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(64, recv_opnd, shape_id_offset); + asm.test(shape_opnd, Opnd::UImm(SHAPE_ID_HAS_IVAR_MASK as u64)); + asm.jnz(Target::side_exit(Counter::send_str_dup_exivar)); + + // Call rb_str_dup + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, ret_opnd); + + true +} + +// Codegen for rb_str_empty_p() +fn jit_rb_str_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let recv_opnd = asm.stack_pop(1); + + asm_comment!(asm, "get string length"); let str_len_opnd = Opnd::mem( - (8 * size_of::<std::os::raw::c_long>()) as u8, + std::os::raw::c_long::BITS as u8, asm.load(recv_opnd), - RUBY_OFFSET_RSTRING_AS_HEAP_LEN as i32, + RUBY_OFFSET_RSTRING_LEN as i32, ); asm.cmp(str_len_opnd, Opnd::UImm(0)); let string_empty = asm.csel_e(Qtrue.into(), Qfalse.into()); + let out_opnd = asm.stack_push(Type::UnknownImm); asm.mov(out_opnd, string_empty); return true; } +// Codegen for rb_str_concat() with an integer argument -- *not* String#concat +// Using strings as a byte buffer often includes appending byte values to the end of the string. +fn jit_rb_str_concat_codepoint( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#<< with codepoint argument"); + + // Either of the string concatenation functions we call will reallocate the string to grow its + // capacity if necessary. In extremely rare cases (i.e., string exceeds `LONG_MAX` bytes), + // either of the called functions will raise an exception. + jit_prepare_non_leaf_call(jit, asm); + + let codepoint = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0)); + + asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]); + + // The receiver is the return value, so we only need to pop the codepoint argument off the stack. + // We can reuse the receiver slot in the stack as the return value. + asm.stack_pop(1); + + true +} + // Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". // This is common in Erb and similar templating languages. fn jit_rb_str_concat( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, - _ci: *const rb_callinfo, - _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, - _argc: i32, - _known_recv_class: *const VALUE, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, ) -> bool { // The << operator can accept integer codepoints for characters // as the argument. We only specially optimise string arguments. // If the peeked-at compile time argument is something other than // a string, assume it won't be a string later either. - let comptime_arg = jit_peek_at_stack(jit, ctx, 0); + let comptime_arg = jit.peek_at_stack(&asm.ctx, 0); + if unsafe { RB_TYPE_P(comptime_arg, RUBY_T_FIXNUM) } { + return jit_rb_str_concat_codepoint(jit, asm, ci, cme, block, argc, known_recv_class); + } + if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } { return false; } - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + // Guard that the concat argument is a string + guard_object_is_string(asm, asm.stack_opnd(0), StackOpnd(0), Counter::guard_send_not_string); - // Guard that the argument is of class String at runtime. - let arg_type = ctx.get_opnd_type(StackOpnd(0)); + // Guard buffers from GC since rb_str_buf_append may allocate. + // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised + // backtraces on this method since the interpreter does the same thing on opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); - let concat_arg = ctx.stack_pop(1); - let recv = ctx.stack_pop(1); + // Explicitly spill temps before making any C calls. `ccall` will spill temps, but it does a + // check to only spill if it thinks it's necessary. That logic can't see through the runtime + // branching occurring in the code generated for this function. Consequently, the branch for + // the first `ccall` will spill registers but the second one will not. At run time, we may + // jump over that spill code when executing the second branch, leading situations that are + // quite hard to debug. If we spill up front we avoid diverging behavior. + asm.spill_regs(); - // If we're not compile-time certain that this will always be a string, guard at runtime - if arg_type != Type::CString && arg_type != Type::TString { - let arg_opnd = asm.load(concat_arg); - if !arg_type.is_heap() { - asm.comment("guard arg not immediate"); - asm.test(arg_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); - asm.jnz(side_exit); - asm.cmp(arg_opnd, Qfalse.into()); - asm.je(side_exit); - } - guard_object_is_string(asm, arg_opnd, side_exit); - } + let concat_arg = asm.stack_pop(1); + let recv = asm.stack_pop(1); // Test if string encodings differ. If different, use rb_str_append. If the same, // use rb_yjit_str_simple_append, which calls rb_str_cat. - asm.comment("<< on strings"); + asm_comment!(asm, "<< on strings"); // Take receiver's object flags XOR arg's flags. If any // string-encoding flags are different between the two, @@ -4250,21 +6321,22 @@ fn jit_rb_str_concat( ); asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); - // Push once, use the resulting operand in both branches below. - let stack_ret = ctx.stack_push(Type::CString); - let enc_mismatch = asm.new_label("enc_mismatch"); asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); let ret_label = asm.new_label("func_return"); + let stack_ret = asm.stack_push(Type::TString); asm.mov(stack_ret, ret_opnd); + asm.stack_pop(1); // forget stack_ret to re-push after ccall asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate asm.write_label(enc_mismatch); + asm.spill_regs(); // Ignore the register for the other local branch let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + let stack_ret = asm.stack_push(Type::TString); asm.mov(stack_ret, ret_opnd); // Drop through to return @@ -4273,30 +6345,120 @@ fn jit_rb_str_concat( true } +// Codegen for rb_ary_empty_p() +fn jit_rb_ary_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + asm.test(len_opnd, len_opnd); + let bool_val = asm.csel_z(Qtrue.into(), Qfalse.into()); + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.store(out_opnd, bool_val); + + return true; +} + +// Codegen for rb_ary_length() +fn jit_rb_ary_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + // Convert the length to a fixnum + let shifted_val = asm.lshift(len_opnd, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + asm.store(out_opnd, out_val); + + return true; +} + +fn jit_rb_ary_push( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Array#<<"); + + // rb_ary_push allocates memory for buffer extension and can raise FrozenError + // Not using a lazy frame here since the interpreter also has a truncated + // stack trace from opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); + + let item_opnd = asm.stack_opnd(0); + let ary_opnd = asm.stack_opnd(1); + let ret = asm.ccall(rb_ary_push as *const u8, vec![ary_opnd, item_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let ret_opnd = asm.stack_push(Type::TArray); + asm.mov(ret_opnd, ret); + true +} + +// Just a leaf method, but not using `Primitive.attr! :leaf` since BOP methods can't use it. +fn jit_rb_hash_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Hash#empty?"); + + let hash_opnd = asm.stack_pop(1); + let ret = asm.ccall(rb_hash_empty_p as *const u8, vec![hash_opnd]); + + let ret_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(ret_opnd, ret); + true +} + fn jit_obj_respond_to( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, argc: i32, - known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool { // respond_to(:sym) or respond_to(:sym, true) if argc != 1 && argc != 2 { return false; } - if known_recv_class.is_null() { - return false; - } - - let recv_class = unsafe { *known_recv_class }; + let recv_class = match known_recv_class { + Some(class) => class, + None => return false, + }; // Get the method_id from compile time. We will later add a guard against it. - let mid_sym = jit_peek_at_stack(jit, ctx, (argc - 1) as isize); + let mid_sym = jit.peek_at_stack(&asm.ctx, (argc - 1) as isize); if !mid_sym.static_sym_p() { return false } @@ -4308,7 +6470,7 @@ fn jit_obj_respond_to( Some(false) } else { // Get value from type information (may or may not be known) - ctx.get_opnd_type(StackOpnd(0)).known_truthy() + asm.ctx.get_opnd_type(StackOpnd(0)).known_truthy() }; let target_cme = unsafe { rb_callable_method_entry_or_negative(recv_class, mid) }; @@ -4329,83 +6491,223 @@ fn jit_obj_respond_to( }; let result = match (visibility, allow_priv) { - (METHOD_VISI_UNDEF, _) => Qfalse, // No method => false - (METHOD_VISI_PUBLIC, _) => Qtrue, // Public method => true regardless of include_all - (_, Some(true)) => Qtrue, // include_all => always true + (METHOD_VISI_UNDEF, _) => { + // No method, we can return false given respond_to_missing? hasn't been overridden. + // In the future, we might want to jit the call to respond_to_missing? + if !assume_method_basic_definition(jit, asm, recv_class, ID!(respond_to_missing)) { + return false; + } + Qfalse + } + (METHOD_VISI_PUBLIC, _) | // Public method => fine regardless of include_all + (_, Some(true)) => { // include_all => all visibility are acceptable + // Method exists and has acceptable visibility + if cme_def_type == VM_METHOD_TYPE_NOTIMPLEMENTED { + // C method with rb_f_notimplement(). `respond_to?` returns false + // without consulting `respond_to_missing?`. See also: rb_add_method_cfunc() + Qfalse + } else { + Qtrue + } + } (_, _) => return false // not public and include_all not known, can't compile }; - if result != Qtrue { - // Only if respond_to_missing? hasn't been overridden - // In the future, we might want to jit the call to respond_to_missing? - if !assume_method_basic_definition(jit, ocb, recv_class, idRespond_to_missing.into()) { - return false; - } - } - // Invalidate this block if method lookup changes for the method being queried. This works // both for the case where a method does or does not exist, as for the latter we asked for a // "negative CME" earlier. - assume_method_lookup_stable(jit, ocb, target_cme); - - // Generate a side exit - let side_exit = get_side_exit(jit, ocb, ctx); + jit.assume_method_lookup_stable(asm, target_cme); if argc == 2 { // pop include_all argument (we only use its type info) - ctx.stack_pop(1); + asm.stack_pop(1); } - let sym_opnd = ctx.stack_pop(1); - let _recv_opnd = ctx.stack_pop(1); + let sym_opnd = asm.stack_pop(1); + let _recv_opnd = asm.stack_pop(1); // This is necessary because we have no guarantee that sym_opnd is a constant - asm.comment("guard known mid"); + asm_comment!(asm, "guard known mid"); asm.cmp(sym_opnd, mid_sym.into()); - asm.jne(side_exit); + jit_chain_guard( + JCC_JNE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_respond_to_mid_mismatch, + ); + + jit_putobject(asm, result); + + true +} + +fn jit_rb_f_block_given_p( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::UnknownImm); + + gen_block_given(jit, asm, out_opnd, Qtrue.into(), Qfalse.into()); + + true +} + +/// Codegen for `block_given?` and `defined?(yield)` +fn gen_block_given( + jit: &mut JITState, + asm: &mut Assembler, + out_opnd: Opnd, + true_opnd: Opnd, + false_opnd: Opnd, +) { + asm_comment!(asm, "block_given?"); + + // `yield` goes to the block handler stowed in the "local" iseq which is + // the current iseq or a parent. Only the "method" iseq type can be passed a + // block handler. (e.g. `yield` in the top level script is a syntax error.) + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; + if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { + // Same as rb_vm_frame_block_handler + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + // Return `block_handler != VM_BLOCK_HANDLER_NONE` + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + let block_given = asm.csel_ne(true_opnd, false_opnd); + asm.mov(out_opnd, block_given); + } else { + asm.mov(out_opnd, false_opnd); + } +} + +// Codegen for rb_class_superclass() +fn jit_rb_class_superclass( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<crate::codegen::BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + extern "C" { + fn rb_class_superclass(klass: VALUE) -> VALUE; + } + + // It may raise "uninitialized class" + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) { + return false; + } + + asm_comment!(asm, "Class#superclass"); + let recv_opnd = asm.stack_opnd(0); + let ret = asm.ccall(rb_class_superclass as *const u8, vec![recv_opnd]); - jit_putobject(jit, ctx, asm, result); + asm.stack_pop(1); + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + + true +} + +fn jit_rb_case_equal( + jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if !jit.assume_expected_cfunc(asm, known_recv_class.unwrap(), ID!(eq), rb_obj_equal as _) { + return false; + } + + asm_comment!(asm, "case_equal: {}#===", get_class_name(known_recv_class)); + + // Compare the arguments + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + asm.cmp(arg0, arg1); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); true } fn jit_thread_s_current( _jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, - _block: Option<IseqPtr>, + _block: Option<BlockHandler>, _argc: i32, - _known_recv_class: *const VALUE, + _known_recv_class: Option<VALUE>, ) -> bool { - asm.comment("Thread.current"); - ctx.stack_pop(1); + asm_comment!(asm, "Thread.current"); + asm.stack_pop(1); // ec->thread_ptr - let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR as i32)); // thread->self let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); - let stack_ret = ctx.stack_push(Type::UnknownHeap); + let stack_ret = asm.stack_push(Type::UnknownHeap); asm.mov(stack_ret, thread_self); true } -// Check if we know how to codegen for a particular cfunc method +/// Specialization for rb_obj_dup() (Kernel#dup) +fn jit_rb_obj_dup( + _jit: &mut JITState, + asm: &mut Assembler, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Kernel#dup has arity=0, and caller already did argument count check. + let self_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + if self_type.is_imm() { + // Method is no-op when receiver is an immediate value. + true + } else { + false + } +} + +/// Check if we know how to codegen for a particular cfunc method +/// See also: [reg_method_codegen]. fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> { let method_serial = unsafe { get_def_method_serial(def) }; + let table = unsafe { METHOD_CODEGEN_TABLE.as_ref().unwrap() }; - CodegenGlobals::look_up_codegen_method(method_serial) + let option_ref = table.get(&method_serial); + match option_ref { + None => None, + Some(&mgf) => Some(mgf), // Deref + } } // Is anyone listening for :c_call and :c_return event currently? fn c_method_tracing_currently_enabled(jit: &JITState) -> bool { // Defer to C implementation in yjit.c unsafe { - rb_c_method_tracing_currently_enabled(jit.ec.unwrap() as *mut rb_execution_context_struct) + rb_c_method_tracing_currently_enabled(jit.ec) } } @@ -4430,13 +6732,25 @@ unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> V // at sp[-2]. Depending on the frame type, it can serve different purposes, // which are covered here by enum variants. enum SpecVal { - None, - BlockISeq(IseqPtr), - BlockParamProxy, + BlockHandler(Option<BlockHandler>), PrevEP(*const VALUE), PrevEPOpnd(Opnd), } +// Each variant represents a branch in vm_caller_setup_arg_block. +#[derive(Clone, Copy)] +pub enum BlockHandler { + // send, invokesuper: blockiseq operand + BlockISeq(IseqPtr), + // invokesuper: GET_BLOCK_HANDLER() (GET_LEP()[VM_ENV_DATA_INDEX_SPECVAL]) + LEPSpecVal, + // part of the allocate-free block forwarding scheme + BlockParamProxy, + // To avoid holding the block arg (e.g. proc and symbol) across C calls, + // we might need to set the block handler early in the call sequence + AlreadySet, +} + struct ControlFrame { recv: Opnd, sp: Opnd, @@ -4445,7 +6759,6 @@ struct ControlFrame { frame_type: u32, specval: SpecVal, cme: *const rb_callable_method_entry_t, - local_size: i32 } // Codegen performing a similar (but not identical) function to vm_push_frame @@ -4460,21 +6773,17 @@ struct ControlFrame { // * Provided sp should point to the new frame's sp, immediately following locals and the environment // * At entry, CFP points to the caller (not callee) frame // * At exit, ec->cfp is updated to the pushed CFP -// * CFP and SP registers are updated only if set_sp_cfp is set +// * SP register is updated only if frame.iseq is set // * Stack overflow is not checked (should be done by the caller) // * Interrupts are not checked (should be done by the caller) fn gen_push_frame( jit: &mut JITState, - _ctx: &mut Context, asm: &mut Assembler, - set_sp_cfp: bool, // if true CFP and SP will be switched to the callee frame: ControlFrame, ) { - assert!(frame.local_size >= 0); - let sp = frame.sp; - asm.comment("push cme, specval, frame type"); + asm_comment!(asm, "push cme, specval, frame type"); // Write method entry at sp[-3] // sp[-3] = me; @@ -4486,27 +6795,31 @@ fn gen_push_frame( // the outer environment depending on the frame type. // sp[-2] = specval; let specval: Opnd = match frame.specval { - SpecVal::None => { - VM_BLOCK_HANDLER_NONE.into() - } - SpecVal::BlockISeq(block_iseq) => { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases - // with cfp->block_code. - asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); - - let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); - asm.or(cfp_self, Opnd::Imm(1)) - } - SpecVal::BlockParamProxy => { - let ep_opnd = gen_get_lep(jit, asm); - let block_handler = asm.load( - Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) - ); - - asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), block_handler); - - block_handler + SpecVal::BlockHandler(None) => VM_BLOCK_HANDLER_NONE.into(), + SpecVal::BlockHandler(Some(block_handler)) => { + match block_handler { + BlockHandler::BlockISeq(block_iseq) => { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + + let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self, Opnd::Imm(1)) + } + BlockHandler::LEPSpecVal => { + let lep_opnd = gen_get_lep(jit, asm); + asm.load(Opnd::mem(64, lep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)) + } + BlockHandler::BlockParamProxy => { + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + block_handler + } + BlockHandler::AlreadySet => 0.into(), // unused + } } SpecVal::PrevEP(prev_ep) => { let tagged_prev_ep = (prev_ep as usize) | 1; @@ -4514,9 +6827,13 @@ fn gen_push_frame( } SpecVal::PrevEPOpnd(ep_opnd) => { asm.or(ep_opnd, 1.into()) - }, + } }; - asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), specval); + if let SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) = frame.specval { + asm_comment!(asm, "specval should have been set"); + } else { + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), specval); + } // Write env flags at sp[-1] // sp[-1] = frame_type; @@ -4535,16 +6852,14 @@ fn gen_push_frame( // .self = recv, // .ep = <sp - 1>, // .block_code = 0, - // .__bp__ = sp, // }; - asm.comment("push callee control frame"); + asm_comment!(asm, "push callee control frame"); // For an iseq call PC may be None, in which case we will not set PC and will allow jitted code // to set it as necessary. - let _pc = if let Some(pc) = frame.pc { + if let Some(pc) = frame.pc { asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into()); }; - asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BP), sp); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp); let iseq: Opnd = if let Some(iseq) = frame.iseq { VALUE::from(iseq).into() @@ -4555,89 +6870,36 @@ fn gen_push_frame( asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); - // This Qnil fill snippet potentially requires 2 more registers on Arm, one for Qnil and - // another for calculating the address in case there are a lot of local variables. So doing - // this after releasing the register for specval and the receiver to avoid register spill. - let num_locals = frame.local_size; - if num_locals > 0 { - asm.comment("initialize locals"); - - // Initialize local variables to Qnil - for i in 0..num_locals { - let offs = SIZEOF_VALUE_I32 * (i - num_locals - 3); - asm.store(Opnd::mem(64, sp, offs), Qnil.into()); - } - } - - if set_sp_cfp { - // Saving SP before calculating ep avoids a dependency on a register - // However this must be done after referencing frame.recv, which may be SP-relative - asm.mov(SP, sp); - } let ep = asm.sub(sp, SIZEOF_VALUE.into()); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep); - - asm.comment("switch to new CFP"); - let new_cfp = asm.lea(cfp_opnd(0)); - if set_sp_cfp { - asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); - } else { - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp); - } } fn gen_send_cfunc( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, - recv_known_klass: *const VALUE, + block: Option<BlockHandler>, + recv_known_class: Option<VALUE>, flags: u32, argc: i32, -) -> CodegenStatus { +) -> Option<CodegenStatus> { let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; let cfunc_argc = unsafe { get_mct_argc(cfunc) }; let mut argc = argc; - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // If the function expects a Ruby array of arguments - if cfunc_argc < 0 && cfunc_argc != -1 { - gen_counter_incr!(asm, send_cfunc_ruby_array_varg); - return CantCompile; - } - - // We aren't handling a vararg cfuncs with splat currently. - if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1 { - gen_counter_incr!(asm, send_args_splat_cfunc_var_args); - return CantCompile; - } + // Splat call to a C method that takes `VALUE *` and `len` + let variable_splat = flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1; + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; - if flags & VM_CALL_ARGS_SPLAT != 0 && flags & VM_CALL_ZSUPER != 0 { - // zsuper methods are super calls without any arguments. - // They are also marked as splat, but don't actually have an array - // they pull arguments from, instead we need to change to call - // a different method with the current stack. - gen_counter_incr!(asm, send_args_splat_cfunc_zuper); - return CantCompile; + // If it's a splat and the method expects a Ruby array of arguments + if cfunc_argc == -2 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(jit, asm, Counter::send_cfunc_splat_neg2); + return None; } - // In order to handle backwards compatibility between ruby 3 and 2 - // ruby2_keywords was introduced. It is called only on methods - // with splat and changes they way they handle them. - // We are just going to not compile these. - // https://docs.ruby-lang.org/en/3.2/Module.html#method-i-ruby2_keywords - if unsafe { - get_iseq_flags_ruby2_keywords(jit.iseq) && flags & VM_CALL_ARGS_SPLAT != 0 - } { - gen_counter_incr!(asm, send_args_splat_cfunc_ruby2_keywords); - return CantCompile; - } + exit_if_kwsplat_non_nil(jit, asm, flags, Counter::send_cfunc_kw_splat_non_nil)?; + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; let kw_arg = unsafe { vm_ci_kwarg(ci) }; let kw_arg_num = if kw_arg.is_null() { @@ -4647,39 +6909,79 @@ fn gen_send_cfunc( }; if kw_arg_num != 0 && flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_cfunc_splat_with_kw); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cfunc_splat_with_kw); + return None; } if c_method_tracing_currently_enabled(jit) { // Don't JIT if tracing c_call or c_return - gen_counter_incr!(asm, send_cfunc_tracing); - return CantCompile; - } + gen_counter_incr(jit, asm, Counter::send_cfunc_tracing); + return None; + } + + // Increment total cfunc send count + gen_counter_incr(jit, asm, Counter::num_send_cfunc); + + // Delegate to codegen for C methods if we have it and the callsite is simple enough. + if kw_arg.is_null() && + !kw_splat && + flags & VM_CALL_OPT_SEND == 0 && + flags & VM_CALL_ARGS_SPLAT == 0 && + flags & VM_CALL_ARGS_BLOCKARG == 0 && + (cfunc_argc == -1 || argc == cfunc_argc) { + let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc; + if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) { + // We don't push a frame for specialized cfunc codegen, so the generated code must be leaf. + // However, the interpreter doesn't push a frame on opt_* instruction either, so we allow + // non-sendish instructions to break this rule as an exception. + let cfunc_codegen = if jit.is_sendish() { + asm.with_leaf_ccall(|asm| + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ci, cme, block, argc, recv_known_class)) + ) + } else { + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ci, cme, block, argc, recv_known_class)) + }; - // Delegate to codegen for C methods if we have it. - if kw_arg.is_null() && flags & VM_CALL_OPT_SEND == 0 { - let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def }); - if let Some(known_cfunc_codegen) = codegen_p { - if known_cfunc_codegen(jit, ctx, asm, ocb, ci, cme, block, argc, recv_known_klass) { + if cfunc_codegen { + assert_eq!(expected_stack_after, asm.ctx.get_stack_size() as i32); + gen_counter_incr(jit, asm, Counter::num_send_cfunc_inline); // cfunc codegen generated code. Terminate the block so // there isn't multiple calls in the same block. - jump_to_next_insn(jit, ctx, asm, ocb); - return EndBlock; + return jump_to_next_insn(jit, asm); } } } // Check for interrupts - gen_check_ints(asm, side_exit); + gen_check_ints(asm, Counter::guard_send_interrupted); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t) - asm.comment("stack overflow check"); - let stack_limit = asm.lea(ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize)); + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); + let stack_limit = asm.lea(asm.ctx.sp_opnd((4 + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE)) as i32)); asm.cmp(CFP, stack_limit); - asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow)); + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + // Guard for variable length splat call before any modifications to the stack + if variable_splat { + let splat_array_idx = i32::from(kw_splat) + i32::from(block_arg); + let comptime_splat_array = jit.peek_at_stack(&asm.ctx, splat_array_idx as isize); + if unsafe { rb_yjit_ruby2_keywords_splat_p(comptime_splat_array) } != 0 { + gen_counter_incr(jit, asm, Counter::send_cfunc_splat_varg_ruby2_keywords); + return None; + } + + let splat_array = asm.stack_opnd(splat_array_idx); + guard_object_is_array(asm, splat_array, splat_array.into(), Counter::guard_send_splat_not_array); + + asm_comment!(asm, "guard variable length splat call servicable"); + let sp = asm.ctx.sp_opnd(0); + let proceed = asm.ccall(rb_yjit_splat_varg_checks as _, vec![sp, splat_array, CFP]); + asm.cmp(proceed, Qfalse.into()); + asm.je(Target::side_exit(Counter::guard_send_cfunc_bad_splat_vargs)); + } // Number of args which will be passed through to the callee // This is adjusted by the kwargs being combined into a hash. @@ -4689,93 +6991,116 @@ fn gen_send_cfunc( argc - kw_arg_num + 1 }; + // Exclude the kw_splat hash from arity check + if kw_splat { + passed_argc -= 1; + } // If the argument count doesn't match if cfunc_argc >= 0 && cfunc_argc != passed_argc && flags & VM_CALL_ARGS_SPLAT == 0 { - gen_counter_incr!(asm, send_cfunc_argc_mismatch); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cfunc_argc_mismatch); + return None; } // Don't JIT functions that need C stack arguments for now if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { - gen_counter_incr!(asm, send_cfunc_toomany_args); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cfunc_toomany_args); + return None; } - let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; - let block_arg_type = if block_arg { - Some(ctx.get_opnd_type(StackOpnd(0))) + let mut block_arg_type = if block_arg { + Some(asm.ctx.get_opnd_type(StackOpnd(0))) } else { None }; match block_arg_type { Some(Type::Nil | Type::BlockParamProxy) => { - // We'll handle this later + // We don't need the actual stack value for these + asm.stack_pop(1); } - None => { - // Nothing to do - } - _ => { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; - } - } - - match block_arg_type { - Some(Type::Nil) => { - // We have a nil block arg, so let's pop it off the args - ctx.stack_pop(1); - } - Some(Type::BlockParamProxy) => { - // We don't need the actual stack value - ctx.stack_pop(1); + Some(Type::Unknown | Type::UnknownImm) if jit.peek_at_stack(&asm.ctx, 0).nil_p() => { + // The sample blockarg is nil, so speculate that's the case. + asm.cmp(asm.stack_opnd(0), Qnil.into()); + asm.jne(Target::side_exit(Counter::guard_send_cfunc_block_not_nil)); + block_arg_type = Some(Type::Nil); + asm.stack_pop(1); } None => { // Nothing to do } _ => { - assert!(false); + gen_counter_incr(jit, asm, Counter::send_cfunc_block_arg); + return None; } } + let block_arg_type = block_arg_type; // drop `mut` - // This is a .send call and we need to adjust the stack - if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); + // Pop the empty kw_splat hash + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; } - // push_splat_args does stack manipulation so we can no longer side exit - if flags & VM_CALL_ARGS_SPLAT != 0 { + // Splat handling when C method takes a static number of arguments. + // push_splat_args() does stack manipulation so we can no longer side exit + if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc >= 0 { let required_args : u32 = (cfunc_argc as u32).saturating_sub(argc as u32 - 1); // + 1 because we pass self if required_args + 1 >= C_ARG_OPNDS.len() as u32 { - gen_counter_incr!(asm, send_cfunc_toomany_args); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cfunc_toomany_args); + return None; } + // We are going to assume that the splat fills - // all the remaining arguments. In the generated code - // we test if this is true and if not side exit. - argc = required_args as i32; + // all the remaining arguments. So the number of args + // should just equal the number of args the cfunc takes. + // In the generated code we test if this is true + // and if not side exit. + argc = cfunc_argc; passed_argc = argc; - push_splat_args(required_args, ctx, asm, ocb, side_exit) + push_splat_args(required_args, asm) } + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // Push a dynamic number of items from the splat array to the stack when calling a vargs method + let dynamic_splat_size = if variable_splat { + asm_comment!(asm, "variable length splat"); + let stack_splat_array = asm.lea(asm.stack_opnd(0)); + Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array])) + } else { + None + }; + // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); + let recv = asm.stack_opnd(argc); // Store incremented PC into current control frame in case callee raises. jit_save_pc(jit, asm); - // Increment the stack pointer by 3 (in the callee) - // sp += 3 - let sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); + // Find callee's SP with space for metadata. + // Usually sp+3. + let sp = if let Some(splat_size) = dynamic_splat_size { + // Compute the callee's SP at runtime in case we accept a variable size for the splat array + const _: () = assert!(SIZEOF_VALUE == 8, "opting for a shift since mul on A64 takes no immediates"); + let splat_size_bytes = asm.lshift(splat_size, 3usize.into()); + // 3 items for method metadata, minus one to remove the splat array + let static_stack_top = asm.lea(asm.ctx.sp_opnd(2)); + asm.add(static_stack_top, splat_size_bytes) + } else { + asm.lea(asm.ctx.sp_opnd(3)) + }; let specval = if block_arg_type == Some(Type::BlockParamProxy) { - SpecVal::BlockParamProxy - } else if let Some(block_iseq) = block { - SpecVal::BlockISeq(block_iseq) + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) } else { - SpecVal::None + SpecVal::BlockHandler(block) }; let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; @@ -4783,60 +7108,83 @@ fn gen_send_cfunc( frame_type |= VM_FRAME_FLAG_CFRAME_KW } - gen_push_frame(jit, ctx, asm, false, ControlFrame { + perf_call!("gen_send_cfunc: ", gen_push_frame(jit, asm, ControlFrame { frame_type, specval, cme, recv, sp, - pc: Some(0), + pc: if cfg!(feature = "runtime_checks") { + Some(!0) // Poison value. Helps to fail fast. + } else { + None // Leave PC uninitialized as cfuncs shouldn't read it + }, iseq: None, - local_size: 0, - }); + })); + + asm_comment!(asm, "set ec->cfp"); + let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32))); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), new_cfp); if !kw_arg.is_null() { // Build a hash from all kwargs passed - asm.comment("build_kwhash"); + asm_comment!(asm, "build_kwhash"); let imemo_ci = VALUE(ci as usize); assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, "we assume all callinfos with kwargs are on the GC heap"); - let sp = asm.lea(ctx.sp_opnd(0)); + let sp = asm.lea(asm.ctx.sp_opnd(0)); let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); // Replace the stack location at the start of kwargs with the new hash - let stack_opnd = ctx.stack_opnd(argc - passed_argc); + let stack_opnd = asm.stack_opnd(argc - passed_argc); asm.mov(stack_opnd, kwargs); } - // Copy SP because REG_SP will get overwritten - let sp = asm.lea(ctx.sp_opnd(0)); - - // Pop the C function arguments from the stack (in the caller) - ctx.stack_pop((argc + 1).try_into().unwrap()); - // Write interpreter SP into CFP. - // Needed in case the callee yields to the block. - gen_save_sp(jit, asm, ctx); + // We don't pop arguments yet to use registers for passing them, but we + // have to set cfp->sp below them for full_cfunc_return() invalidation. + gen_save_sp_with_offset(asm, -(argc + 1) as i8); // Non-variadic method let args = if cfunc_argc >= 0 { // Copy the arguments from the stack to the C argument registers // self is the 0th argument and is at index argc from the stack top (0..=passed_argc).map(|i| - Opnd::mem(64, sp, -(argc + 1 - i) * SIZEOF_VALUE_I32) + asm.stack_opnd(argc - i) ).collect() } // Variadic method else if cfunc_argc == -1 { // The method gets a pointer to the first argument // rb_f_puts(int argc, VALUE *argv, VALUE recv) + + let passed_argc_opnd = if let Some(splat_size) = dynamic_splat_size { + // The final argc is the size of the splat, minus one for the splat array itself + asm.add(splat_size, (passed_argc - 1).into()) + } else { + // Without a splat, passed_argc is static + Opnd::Imm(passed_argc.into()) + }; + vec![ - Opnd::Imm(passed_argc.into()), - asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)), - Opnd::mem(64, sp, -(argc + 1) * SIZEOF_VALUE_I32), + passed_argc_opnd, + asm.lea(asm.ctx.sp_opnd(-argc)), + asm.stack_opnd(argc), ] } - else { + // Variadic method taking a Ruby array + else if cfunc_argc == -2 { + // Slurp up all the arguments into an array + let stack_args = asm.lea(asm.ctx.sp_opnd(-argc)); + let args_array = asm.ccall( + rb_ec_ary_new_from_values as _, + vec![EC, passed_argc.into(), stack_args] + ); + + // Example signature: + // VALUE neg2_method(VALUE self, VALUE argv) + vec![asm.stack_opnd(argc), args_array] + } else { panic!("unexpected cfunc_args: {}", cfunc_argc) }; @@ -4844,73 +7192,58 @@ fn gen_send_cfunc( // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); // cfunc comes from compile-time cme->def, which we assume to be stable. // Invalidation logic is in yjit_method_lookup_change() - asm.comment("call C function"); + asm_comment!(asm, "call C function"); let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args); + asm.stack_pop((argc + 1).try_into().unwrap()); // Pop arguments after ccall to use registers for passing them. // Record code position for TracePoint patching. See full_cfunc_return(). record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); // Push the return value on the Ruby stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); + // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs. + // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the method name string + let mid = unsafe { rb_get_def_original_id((*cme).def) }; + let name_str = get_method_name(Some(unsafe { (*cme).owner }), mid); + + // Get an index for this cfunc name + let cfunc_idx = get_cfunc_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]); + } + // Pop the stack frame (ec->cfp++) // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved // register - let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32); asm.store(ec_cfp_opnd, CFP); // cfunc calls may corrupt types - ctx.clear_local_types(); + asm.clear_local_types(); // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1 // which allows for sharing the same successor. // Jump (fall through) to the call continuation block // We do this to end the current block after the call - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) } -fn gen_return_branch( - asm: &mut Assembler, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - match shape { - BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => { - asm.comment("update cfp->jit_return"); - asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.raw_ptr())); - } - } -} - -/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args) -/// It optimistically compiles to a static size that is the exact number of arguments -/// needed for the function. -fn push_splat_args(required_args: u32, ctx: &mut Context, asm: &mut Assembler, ocb: &mut OutlinedCb, side_exit: Target) { - - asm.comment("push_splat_args"); - - let array_opnd = ctx.stack_opnd(0); - let array_reg = asm.load(array_opnd); - - guard_object_is_heap( - asm, - array_reg, - counted_exit!(ocb, side_exit, send_splat_not_array), - ); - guard_object_is_array( - asm, - array_reg, - counted_exit!(ocb, side_exit, send_splat_not_array), - ); - - asm.comment("Get array length for embedded or heap"); +// Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access, +// and use Opnd::Mem to save registers. +fn get_array_len(asm: &mut Assembler, array_opnd: Opnd) -> Opnd { + asm_comment!(asm, "get array length for embedded or heap"); // Pull out the embed flag to check if it's an embedded array. + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); // Get the length of the array @@ -4921,220 +7254,300 @@ fn push_splat_args(required_args: u32, ctx: &mut Context, asm: &mut Assembler, o let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); - // Need to repeat this here to deal with register allocation - let array_opnd = ctx.stack_opnd(0); - let array_reg = asm.load(array_opnd); - + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; let array_len_opnd = Opnd::mem( - (8 * size_of::<std::os::raw::c_long>()) as u8, + std::os::raw::c_long::BITS as u8, array_reg, RUBY_OFFSET_RARRAY_AS_HEAP_LEN, ); - let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd); - - asm.comment("Side exit if length doesn't not equal remaining args"); - asm.cmp(array_len_opnd, required_args.into()); - asm.jne(counted_exit!(ocb, side_exit, send_splatarray_length_not_equal)); - asm.comment("Check last argument is not ruby2keyword hash"); + // Select the array length value + asm.csel_nz(emb_len_opnd, array_len_opnd) +} - // Need to repeat this here to deal with register allocation - let array_reg = asm.load(ctx.stack_opnd(0)); +// Generate RARRAY_CONST_PTR (part of RARRAY_AREF) +fn get_array_ptr(asm: &mut Assembler, array_reg: Opnd) -> Opnd { + asm_comment!(asm, "get array pointer for embedded or heap"); let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); let heap_ptr_opnd = Opnd::mem( - (8 * size_of::<usize>()) as u8, + usize::BITS as u8, array_reg, RUBY_OFFSET_RARRAY_AS_HEAP_PTR, ); + // Load the address of the embedded array // (struct RArray *)(obj)->as.ary let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); - let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); + asm.csel_nz(ary_opnd, heap_ptr_opnd) +} - let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32))); +// Generate RSTRING_PTR +fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd { + asm_comment!(asm, "get string pointer for embedded or heap"); + + let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into()); + let heap_ptr_opnd = asm.load(Opnd::mem( + usize::BITS as u8, + string_reg, + RUBY_OFFSET_RSTRING_AS_HEAP_PTR, + )); - guard_object_is_not_ruby2_keyword_hash( + // Load the address of the embedded array + // (struct RString *)(obj)->as.ary + let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RSTRING_AS_ARY)); + asm.csel_nz(heap_ptr_opnd, ary_opnd) +} + +/// Pushes arguments from an array to the stack. Differs from push splat because +/// the array can have items left over. Array is assumed to be T_ARRAY without guards. +fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "copy_splat_args_for_rest_callee"); + + // Unused operands cause the backend to panic + if num_args == 0 { + return; + } + + asm_comment!(asm, "Push arguments from array"); + + let array_reg = asm.load(array); + let ary_opnd = get_array_ptr(asm, array_reg); + for i in 0..num_args { + let top = asm.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); + } +} + +/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args) +/// It optimistically compiles to a static size that is the exact number of arguments +/// needed for the function. +fn push_splat_args(required_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "push_splat_args"); + + let array_opnd = asm.stack_opnd(0); + guard_object_is_array( asm, - last_array_value, - counted_exit!(ocb, side_exit, send_splatarray_last_ruby_2_keywords)); + array_opnd, + array_opnd.into(), + Counter::guard_send_splat_not_array, + ); + + let array_len_opnd = get_array_len(asm, array_opnd); - asm.comment("Push arguments from array"); - let array_opnd = ctx.stack_pop(1); + asm_comment!(asm, "Guard for expected splat length"); + asm.cmp(array_len_opnd, required_args.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + // Check last element of array if present if required_args > 0 { - // Load the address of the embedded array - // (struct RArray *)(obj)->as.ary - let array_reg = asm.load(array_opnd); + asm_comment!(asm, "Check last argument is not ruby2keyword hash"); - // Conditionally load the address of the heap array - // (struct RArray *)(obj)->as.heap.ptr - let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); - asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64)); - let heap_ptr_opnd = Opnd::mem( - (8 * size_of::<usize>()) as u8, - array_reg, - RUBY_OFFSET_RARRAY_AS_HEAP_PTR, + // Need to repeat this here to deal with register allocation + let array_reg = asm.load(asm.stack_opnd(0)); + let ary_opnd = get_array_ptr(asm, array_reg); + let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32))); + guard_object_is_not_ruby2_keyword_hash( + asm, + last_array_value, + Counter::guard_send_splatarray_last_ruby2_keywords, ); - // Load the address of the embedded array - // (struct RArray *)(obj)->as.ary - let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); - let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); + } + + asm_comment!(asm, "Push arguments from array"); + let array_opnd = asm.stack_pop(1); + + if required_args > 0 { + let array_reg = asm.load(array_opnd); + let ary_opnd = get_array_ptr(asm, array_reg); for i in 0..required_args { - let top = ctx.stack_push(Type::Unknown); + let top = asm.stack_push(Type::Unknown); asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); } - asm.comment("end push_each"); + asm_comment!(asm, "end push_each"); } } fn gen_send_bmethod( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, flags: u32, argc: i32, -) -> CodegenStatus { +) -> Option<CodegenStatus> { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; - let proc = unsafe { rb_yjit_get_proc_ptr(procv) }; + let proc = unsafe { rb_jit_get_proc_ptr(procv) }; let proc_block = unsafe { &(*proc).block }; if proc_block.type_ != block_type_iseq { - return CantCompile; + return None; } let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, ocb) { - gen_counter_incr!(asm, send_bmethod_ractor); - return CantCompile; + if !procv.shareable_p() { + let ractor_serial = unsafe { rb_yjit_cme_ractor_serial(cme) }; + asm_comment!(asm, "guard current ractor == {}", ractor_serial); + let current_ractor_serial = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_RACTOR_ID as i32)); + asm.cmp(current_ractor_serial, ractor_serial.into()); + asm.jne(Target::side_exit(Counter::send_bmethod_ractor)); } // Passing a block to a block needs logic different from passing // a block to a method and sometimes requires allocation. Bail for now. if block.is_some() { - gen_counter_incr!(asm, send_bmethod_block_arg); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_bmethod_block_arg); + return None; } let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA; - gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) + perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) } +} + +/// The kind of a value an ISEQ returns +enum IseqReturn { + Value(VALUE), + LocalVariable(u32), + Receiver, +} + +extern "C" { + fn rb_simple_iseq_p(iseq: IseqPtr) -> bool; + fn rb_iseq_only_kwparam_p(iseq: IseqPtr) -> bool; +} + +/// Return the ISEQ's return value if it consists of one simple instruction and leave. +fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, block: Option<BlockHandler>, ci_flags: u32) -> Option<IseqReturn> { + // Expect only two instructions and one possible operand + // NOTE: If an ISEQ has an optional keyword parameter with a default value that requires + // computation, the ISEQ will always have more than two instructions and won't be inlined. + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + if !(2..=3).contains(&iseq_size) { + return None; + } + + // Get the first two instructions + let first_insn = iseq_opcode_at_idx(iseq, 0); + let second_insn = iseq_opcode_at_idx(iseq, insn_len(first_insn as usize)); + + // Extract the return value if known + if second_insn != YARVINSN_leave { + return None; + } + match first_insn { + YARVINSN_getlocal_WC_0 => { + // Accept only cases where only positional arguments are used by both the callee and the caller. + // Keyword arguments may be specified by the callee or the caller but not used. + // Reject block ISEQs to avoid autosplat and other block parameter complications. + if captured_opnd.is_some() + // Reject if block ISEQ is present + || block.is_some() + // Equivalent to `VM_CALL_ARGS_SIMPLE - VM_CALL_KWARG - has_block_iseq` + || ci_flags & ( + VM_CALL_ARGS_SPLAT + | VM_CALL_KW_SPLAT + | VM_CALL_ARGS_BLOCKARG + | VM_CALL_FORWARDING + ) != 0 + { + return None; + } + + let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); + let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + + // Only inline getlocal on a parameter. DCE in the IESQ builder can + // make a two-instruction ISEQ that does not return a parameter. + if local_idx >= unsafe { get_iseq_body_param_size(iseq) } { + return None; + } + + if unsafe { rb_simple_iseq_p(iseq) } { + return Some(IseqReturn::LocalVariable(local_idx)); + } else if unsafe { rb_iseq_only_kwparam_p(iseq) } { + // Inline if only positional parameters are used + if let Ok(i) = i32::try_from(local_idx) { + if i < unsafe { rb_get_iseq_body_param_lead_num(iseq) } { + return Some(IseqReturn::LocalVariable(local_idx)); + } + } + } + + return None; + } + YARVINSN_putnil => Some(IseqReturn::Value(Qnil)), + YARVINSN_putobject => Some(IseqReturn::Value(unsafe { *rb_iseq_pc_at_idx(iseq, 1) })), + YARVINSN_putobject_INT2FIX_0_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(0))), + YARVINSN_putobject_INT2FIX_1_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(1))), + // We don't support invokeblock for now. Such ISEQs are likely not used by blocks anyway. + YARVINSN_putself if captured_opnd.is_none() => Some(IseqReturn::Receiver), + _ => None, + } } fn gen_send_iseq( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, iseq: *const rb_iseq_t, ci: *const rb_callinfo, frame_type: u32, prev_ep: Option<*const VALUE>, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, flags: u32, argc: i32, captured_opnd: Option<Opnd>, -) -> CodegenStatus { +) -> Option<CodegenStatus> { + // Argument count. We will change this as we gather values from + // sources to satisfy the callee's parameters. To help make sense + // of changes, note that: + // - Parameters syntactically on the left have lower addresses. + // For example, all the lead (required) and optional parameters + // have lower addresses than the rest parameter array. + // - The larger the index one passes to Assembler::stack_opnd(), + // the *lower* the address. let mut argc = argc; - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // When you have keyword arguments, there is an extra object that gets - // placed on the stack the represents a bitmap of the keywords that were not - // specified at the call site. We need to keep track of the fact that this - // value is present on the stack in order to properly set up the callee's - // stack pointer. - let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) }; + // Iseqs with keyword parameters have a hidden, unnamed parameter local + // that the callee could use to know which keywords are unspecified + // (see the `checkkeyword` instruction and check `ruby --dump=insn -e 'def foo(k:itself)=k'`). + // We always need to set up this local if the call goes through. + let has_kwrest = unsafe { get_iseq_flags_has_kwrest(iseq) }; + let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) } || has_kwrest; let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0; + let iseq_has_rest = unsafe { get_iseq_flags_has_rest(iseq) }; + let iseq_has_block_param = unsafe { get_iseq_flags_has_block(iseq) }; + let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock) + + // Is this iseq tagged as "forwardable"? Iseqs that take `...` as a + // parameter are tagged as forwardable (e.g. `def foo(...); end`) + let forwarding = unsafe { rb_get_iseq_flags_forwardable(iseq) }; + + // If a "forwardable" iseq has been called with a splat, then we _do not_ + // want to expand the splat to the stack. So we'll only consider this + // a splat call if the callee iseq is not forwardable. For example, + // we do not want to handle the following code: + // + // `def foo(...); end; foo(*blah)` + let splat_call = (flags & VM_CALL_ARGS_SPLAT != 0) && !forwarding; + let kw_splat = (flags & VM_CALL_KW_SPLAT != 0) && !forwarding; - if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 { - // We can't handle tailcalls - gen_counter_incr!(asm, send_iseq_tailcall); - return CantCompile; - } - - // No support for callees with these parameters yet as they require allocation - // or complex handling. - if unsafe { get_iseq_flags_has_rest(iseq) } { - gen_counter_incr!(asm, send_iseq_has_rest); - return CantCompile; - } - if unsafe { get_iseq_flags_has_post(iseq) } { - gen_counter_incr!(asm, send_iseq_has_post); - return CantCompile; - } - if unsafe { get_iseq_flags_has_kwrest(iseq) } { - gen_counter_incr!(asm, send_iseq_has_kwrest); - return CantCompile; - } - - // In order to handle backwards compatibility between ruby 3 and 2 - // ruby2_keywords was introduced. It is called only on methods - // with splat and changes they way they handle them. - // We are just going to not compile these. - // https://www.rubydoc.info/stdlib/core/Proc:ruby2_keywords - if unsafe { - get_iseq_flags_ruby2_keywords(jit.iseq) && flags & VM_CALL_ARGS_SPLAT != 0 - } { - gen_counter_incr!(asm, send_iseq_ruby2_keywords); - return CantCompile; - } - - // If we have keyword arguments being passed to a callee that only takes - // positionals, then we need to allocate a hash. For now we're going to - // call that too complex and bail. - if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } { - gen_counter_incr!(asm, send_iseq_has_no_kw); - return CantCompile; - } - - // If we have a method accepting no kwargs (**nil), exit if we have passed - // it any kwargs. - if supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) } { - gen_counter_incr!(asm, send_iseq_accepts_no_kwarg); - return CantCompile; - } - - // For computing number of locals to set up for the callee - let mut num_params = unsafe { get_iseq_body_param_size(iseq) }; - - // Block parameter handling. This mirrors setup_parameters_complex(). - if unsafe { get_iseq_flags_has_block(iseq) } { - if unsafe { get_iseq_body_local_iseq(iseq) == iseq } { - num_params -= 1; - } else { - // In this case (param.flags.has_block && local_iseq != iseq), - // the block argument is setup as a local variable and requires - // materialization (allocation). Bail. - gen_counter_incr!(asm, send_iseq_materialized_block); - return CantCompile; - } - } - + // For computing offsets to callee locals + let num_params = unsafe { get_iseq_body_param_size(iseq) as i32 }; + let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 }; - if flags & VM_CALL_ARGS_SPLAT != 0 && flags & VM_CALL_ZSUPER != 0 { - // zsuper methods are super calls without any arguments. - // They are also marked as splat, but don't actually have an array - // they pull arguments from, instead we need to change to call - // a different method with the current stack. - gen_counter_incr!(asm, send_iseq_zsuper); - return CantCompile; - } - - let mut start_pc_offset = 0; + let mut start_pc_offset: u16 = 0; let required_num = unsafe { get_iseq_body_param_lead_num(iseq) }; // This struct represents the metadata about the caller-specified @@ -5146,392 +7559,623 @@ fn gen_send_iseq( unsafe { get_cikw_keyword_len(kw_arg) } }; - // Arity handling and optional parameter setup - let opts_filled = argc - required_num - kw_arg_num; + // Arity handling and optional parameter setup for positional arguments. + // Splats are handled later. + let mut opts_filled = argc - required_num - kw_arg_num - i32::from(kw_splat) - i32::from(splat_call); let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) }; - let opts_missing: i32 = opt_num - opts_filled; - - - if opt_num > 0 && flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_iseq_splat_with_opt); - return CantCompile; + // With a rest parameter or a yield to a block, + // callers can pass more than required + optional. + // So we cap ops_filled at opt_num. + if iseq_has_rest || arg_setup_block { + opts_filled = min(opts_filled, opt_num); } + let mut opts_missing: i32 = opt_num - opts_filled; - if doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_iseq_splat_with_kw); - return CantCompile; + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; + // Stack index of the splat array + let splat_pos = i32::from(block_arg) + i32::from(kw_splat) + kw_arg_num; + + exit_if_stack_too_large(iseq)?; + exit_if_tail_call(jit, asm, ci)?; + exit_if_has_post(jit, asm, iseq)?; + exit_if_kwsplat_non_nil(jit, asm, flags, Counter::send_iseq_kw_splat_non_nil)?; + exit_if_has_rest_and_captured(jit, asm, iseq_has_rest, captured_opnd)?; + exit_if_has_kwrest_and_captured(jit, asm, has_kwrest, captured_opnd)?; + exit_if_has_rest_and_supplying_kws(jit, asm, iseq_has_rest, supplying_kws)?; + exit_if_supplying_kw_and_has_no_kw(jit, asm, supplying_kws, doing_kw_call)?; + exit_if_supplying_kws_and_accept_no_kwargs(jit, asm, supplying_kws, iseq)?; + exit_if_doing_kw_and_splat(jit, asm, doing_kw_call, flags)?; + if !forwarding { + exit_if_wrong_number_arguments(jit, asm, arg_setup_block, opts_filled, flags, opt_num, iseq_has_rest)?; + } + exit_if_doing_kw_and_opts_missing(jit, asm, doing_kw_call, opts_missing)?; + exit_if_has_rest_and_optional_and_block(jit, asm, iseq_has_rest, opt_num, iseq, block_arg)?; + if forwarding && flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(jit, asm, Counter::send_iseq_send_forwarding); + return None; + } + let block_arg_type = exit_if_unsupported_block_arg_type(jit, asm, block_arg)?; + + // Bail if we can't drop extra arguments for a yield by just popping them + if supplying_kws && arg_setup_block && argc > (kw_arg_num + required_num + opt_num) { + gen_counter_incr(jit, asm, Counter::send_iseq_complex_discard_extras); + return None; } - if opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0 { - // Too few arguments and no splat to make up for it - gen_counter_incr!(asm, send_iseq_arity_error); - return CantCompile; + // Block parameter handling. This mirrors setup_parameters_complex(). + if iseq_has_block_param { + if unsafe { get_iseq_body_local_iseq(iseq) == iseq } { + // Do nothing + } else { + // In this case (param.flags.has_block && local_iseq != iseq), + // the block argument is setup as a local variable and requires + // materialization (allocation). Bail. + gen_counter_incr(jit, asm, Counter::send_iseq_materialized_block); + return None; + } } - if opts_filled > opt_num { - // Too many arguments - gen_counter_incr!(asm, send_iseq_arity_error); - return CantCompile; + // Check that required keyword arguments are supplied and find any extras + // that should go into the keyword rest parameter (**kw_rest). + if doing_kw_call { + gen_iseq_kw_call_checks(jit, asm, iseq, kw_arg, has_kwrest, kw_arg_num)?; } - let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; - let block_arg_type = if block_arg { - Some(ctx.get_opnd_type(StackOpnd(0))) + let splat_array_length = if splat_call { + let array = jit.peek_at_stack(&asm.ctx, splat_pos as isize); + let array_length = if array == Qnil { + 0 + } else if unsafe { !RB_TYPE_P(array, RUBY_T_ARRAY) } { + gen_counter_incr(jit, asm, Counter::send_iseq_splat_not_array); + return None; + } else { + unsafe { rb_jit_array_len(array) as u32} + }; + + // Arity check accounting for size of the splat. When callee has rest parameters, we insert + // runtime guards later in copy_splat_args_for_rest_callee() + if !iseq_has_rest { + let supplying = argc - 1 - i32::from(kw_splat) + array_length as i32; + if (required_num..=required_num + opt_num).contains(&supplying) == false { + gen_counter_incr(jit, asm, Counter::send_iseq_splat_arity_error); + return None; + } + } + + if iseq_has_rest && opt_num > 0 { + // If we have a rest and option arguments + // we are going to set the pc_offset for where + // to jump in the called method. + // If the number of args change, that would need to + // change and we don't change that dynmically so we side exit. + // On a normal splat without rest and option args this is handled + // elsewhere depending on the case + asm_comment!(asm, "Side exit if length doesn't not equal compile time length"); + let array_len_opnd = get_array_len(asm, asm.stack_opnd(splat_pos)); + asm.cmp(array_len_opnd, array_length.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + } + + Some(array_length) } else { None }; - match block_arg_type { - Some(Type::Nil | Type::BlockParamProxy) => { - // We'll handle this later - } - None => { - // Nothing to do + // Check if we need the arg0 splat handling of vm_callee_setup_block_arg() + // Also known as "autosplat" inside setup_parameters_complex(). + // Autosplat checks argc == 1 after splat and kwsplat processing, so make + // sure to amend this if we start support kw_splat. + let block_arg0_splat = arg_setup_block + && (argc == 1 || (argc == 2 && splat_array_length == Some(0))) + && !supplying_kws && !doing_kw_call + && unsafe { + (get_iseq_flags_has_lead(iseq) || opt_num > 1) + && !get_iseq_flags_ambiguous_param0(iseq) + }; + if block_arg0_splat { + // If block_arg0_splat, we still need side exits after splat, but + // the splat modifies the stack which breaks side exits. So bail out. + if splat_call { + gen_counter_incr(jit, asm, Counter::invokeblock_iseq_arg0_args_splat); + return None; } - _ => { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; + // The block_arg0_splat implementation cannot deal with optional parameters. + // This is a setup_parameters_complex() situation and interacts with the + // starting position of the callee. + if opt_num > 1 { + gen_counter_incr(jit, asm, Counter::invokeblock_iseq_arg0_optional); + return None; } } - // If we have unfilled optional arguments and keyword arguments then we - // would need to adjust the arguments location to account for that. - // For now we aren't handling this case. - if doing_kw_call && opts_missing > 0 { - gen_counter_incr!(asm, send_iseq_missing_optional_kw); - return CantCompile; + // Adjust `opts_filled` and `opts_missing` taking + // into account the size of the splat expansion. + if let Some(len) = splat_array_length { + assert_eq!(kw_arg_num, 0); // Due to exit_if_doing_kw_and_splat(). + // Simplifies calculation below. + let num_args = argc - 1 - i32::from(kw_splat) + len as i32; + + opts_filled = if num_args >= required_num { + min(num_args - required_num, opt_num) + } else { + 0 + }; + opts_missing = opt_num - opts_filled; } + assert_eq!(opts_missing + opts_filled, opt_num); + assert!(opts_filled >= 0); + + // ISeq with optional parameters start at different + // locations depending on the number of optionals given. if opt_num > 0 { - num_params -= opts_missing as u32; + assert!(opts_filled >= 0); unsafe { let opt_table = get_iseq_body_param_opt_table(iseq); - start_pc_offset = (*opt_table.offset(opts_filled as isize)).as_u32(); + start_pc_offset = opt_table.offset(opts_filled as isize).read().try_into().unwrap(); } } - if doing_kw_call { - // Here we're calling a method with keyword arguments and specifying - // keyword arguments at this call site. - - // This struct represents the metadata about the callee-specified - // keyword parameters. - let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; - let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap(); - let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); - - let mut required_kwargs_filled = 0; - - if keyword_num > 30 { - // We have so many keywords that (1 << num) encoded as a FIXNUM - // (which shifts it left one more) no longer fits inside a 32-bit - // immediate. - gen_counter_incr!(asm, send_iseq_too_many_kwargs); - return CantCompile; - } - - // Check that the kwargs being passed are valid - if supplying_kws { - // This is the list of keyword arguments that the callee specified - // in its initial declaration. - // SAFETY: see compile.c for sizing of this slice. - let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) }; - - // Here we're going to build up a list of the IDs that correspond to - // the caller-specified keyword arguments. If they're not in the - // same order as the order specified in the callee declaration, then - // we're going to need to generate some code to swap values around - // on the stack. - let kw_arg_keyword_len: usize = - unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap(); - let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len]; - for kwarg_idx in 0..kw_arg_keyword_len { - let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) }; - caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; - } + // Increment total ISEQ send count + gen_counter_incr(jit, asm, Counter::num_send_iseq); - // First, we're going to be sure that the names of every - // caller-specified keyword argument correspond to a name in the - // list of callee-specified keyword parameters. - for caller_kwarg in caller_kwargs { - let search_result = callee_kwargs - .iter() - .enumerate() // inject element index - .find(|(_, &kwarg)| kwarg == caller_kwarg); - - match search_result { - None => { - // If the keyword was never found, then we know we have a - // mismatch in the names of the keyword arguments, so we need to - // bail. - gen_counter_incr!(asm, send_iseq_kwargs_mismatch); - return CantCompile; - } - Some((callee_idx, _)) if callee_idx < keyword_required_num => { - // Keep a count to ensure all required kwargs are specified - required_kwargs_filled += 1; - } - _ => (), + // Shortcut for special `Primitive.attr! :leaf` builtins + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; + let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; + let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; + let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins + if let (None, Some(builtin_info), true, false, None | Some(0)) = + (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call, splat_array_length) { + let builtin_argc = unsafe { (*builtin_info).argc }; + if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) { + // We pop the block arg without using it because: + // - the builtin is leaf, so it promises to not `yield`. + // - no leaf builtins have block param at the time of writing, and + // adding one requires interpreter changes to support. + if block_arg_type.is_some() { + if iseq_has_block_param { + gen_counter_incr(jit, asm, Counter::send_iseq_leaf_builtin_block_arg_block_param); + return None; } + asm.stack_pop(1); } - } - assert!(required_kwargs_filled <= keyword_required_num); - if required_kwargs_filled != keyword_required_num { - gen_counter_incr!(asm, send_iseq_kwargs_mismatch); - return CantCompile; - } - } - - // Number of locals that are not parameters - let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32); - // Check for interrupts - gen_check_ints(asm, side_exit); - - match block_arg_type { - Some(Type::Nil) => { - // We have a nil block arg, so let's pop it off the args - ctx.stack_pop(1); - } - Some(Type::BlockParamProxy) => { - // We don't need the actual stack value - ctx.stack_pop(1); - } - None => { - // Nothing to do - } - _ => { - assert!(false); - } - } - - let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) }; - let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() { - None - } else { - Some(leaf_builtin_raw) - }; - if let (None, Some(builtin_info)) = (block, leaf_builtin) { + // Pop empty kw_splat hash which passes nothing (exit_if_kwsplat_non_nil()) + if kw_splat { + asm.stack_pop(1); + } - // this is a .send call not currently supported for builtins - if flags & VM_CALL_OPT_SEND != 0 { - gen_counter_incr!(asm, send_send_builtin); - return CantCompile; - } + // Pop empty splat array which passes nothing + if let Some(0) = splat_array_length { + asm.stack_pop(1); + } - let builtin_argc = unsafe { (*builtin_info).argc }; - if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) { - asm.comment("inlined leaf builtin"); + asm_comment!(asm, "inlined leaf builtin"); + gen_counter_incr(jit, asm, Counter::num_send_iseq_leaf); - // Save the PC and SP because the callee may allocate - // e.g. Integer#abs on a bignum - jit_prepare_routine_call(jit, ctx, asm); + // The callee may allocate, e.g. Integer#abs on a Bignum. + // Save SP for GC, save PC for allocation tracing, and prepare + // for global invalidation after GC's VM lock contention. + jit_prepare_call_with_gc(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) let mut args = vec![EC]; // Copy self and arguments for i in 0..=builtin_argc { - let stack_opnd = ctx.stack_opnd(builtin_argc - i); + let stack_opnd = asm.stack_opnd(builtin_argc - i); args.push(stack_opnd); } - ctx.stack_pop((builtin_argc + 1).try_into().unwrap()); let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); + asm.stack_pop((builtin_argc + 1).try_into().unwrap()); // Keep them on stack during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); // Note: assuming that the leaf builtin doesn't change local variables here. // Seems like a safe assumption. - return KeepCompiling; + // Let guard chains share the same successor + return jump_to_next_insn(jit, asm); + } + } + + // Inline simple ISEQs whose return value is known at compile time + if let (Some(value), None, false) = (iseq_get_return_value(iseq, captured_opnd, block, flags), block_arg_type, opt_send_call) { + asm_comment!(asm, "inlined simple ISEQ"); + gen_counter_incr(jit, asm, Counter::num_send_iseq_inline); + + match value { + IseqReturn::LocalVariable(local_idx) => { + // Put the local variable at the return slot + let stack_local = asm.stack_opnd(argc - 1 - local_idx as i32); + let stack_return = asm.stack_opnd(argc); + asm.mov(stack_return, stack_local); + + // Update the mapping for the return value + let mapping = asm.ctx.get_opnd_mapping(stack_local.into()); + asm.ctx.set_opnd_mapping(stack_return.into(), mapping); + + // Pop everything but the return value + asm.stack_pop(argc as usize); + } + IseqReturn::Value(value) => { + // Pop receiver and arguments + asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 }); + + // Push the return value + let stack_ret = asm.stack_push(Type::from(value)); + asm.mov(stack_ret, value.into()); + }, + IseqReturn::Receiver => { + // Just pop arguments and leave the receiver on stack + asm.stack_pop(argc as usize); + } } + + // Let guard chains share the same successor + return jump_to_next_insn(jit, asm); } // Stack overflow check // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2. // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) - asm.comment("stack overflow check"); + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap(); - let locals_offs = - SIZEOF_VALUE_I32 * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32); - let stack_limit = asm.lea(ctx.sp_opnd(locals_offs as isize)); + let locals_offs = (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE) as i32; + let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs)); asm.cmp(CFP, stack_limit); - asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow)); - - // push_splat_args does stack manipulation so we can no longer side exit - if flags & VM_CALL_ARGS_SPLAT != 0 { - let required_args = num_params - (argc as u32 - 1); - // We are going to assume that the splat fills - // all the remaining arguments. In the generated code - // we test if this is true and if not side exit. - argc = num_params as i32; - push_splat_args(required_args, ctx, asm, ocb, side_exit) - } + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + if iseq_has_rest && splat_call { + // Insert length guard for a call to copy_splat_args_for_rest_callee() + // that will come later. We will have made changes to + // the stack by spilling or handling __send__ shifting + // by the time we get to that code, so we need the + // guard here where we can still side exit. + let non_rest_arg_count = argc - i32::from(kw_splat) - 1; + if non_rest_arg_count < required_num + opt_num { + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); + + if take_count > 0 { + asm_comment!(asm, "guard splat_array_length >= {take_count}"); + + let splat_array = asm.stack_opnd(splat_pos); + let array_len_opnd = get_array_len(asm, splat_array); + asm.cmp(array_len_opnd, take_count.into()); + asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few)); + } + } - // This is a .send call and we need to adjust the stack - if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); + // All splats need to guard for ruby2_keywords hash. Check with a function call when + // splatting into a rest param since the index for the last item in the array is dynamic. + asm_comment!(asm, "guard no ruby2_keywords hash in splat"); + let bad_splat = asm.ccall(rb_yjit_ruby2_keywords_splat_p as _, vec![asm.stack_opnd(splat_pos)]); + asm.cmp(bad_splat, 0.into()); + asm.jnz(Target::side_exit(Counter::guard_send_splatarray_last_ruby2_keywords)); } - if doing_kw_call { - // Here we're calling a method with keyword arguments and specifying - // keyword arguments at this call site. + match block_arg_type { + Some(BlockArg::Nil) => { + // We have a nil block arg, so let's pop it off the args + asm.stack_pop(1); + } + Some(BlockArg::BlockParamProxy) => { + // We don't need the actual stack value + asm.stack_pop(1); + } + Some(BlockArg::TProc) => { + // Place the proc as the block handler. We do this early because + // the block arg being at the top of the stack gets in the way of + // rest param handling later. Also, since there are C calls that + // come later, we can't hold this value in a register and place it + // near the end when we push a new control frame. + asm_comment!(asm, "guard block arg is a proc"); + // Simple predicate, no need for jit_prepare_non_leaf_call(). + let is_proc = asm.ccall(rb_obj_is_proc as _, vec![asm.stack_opnd(0)]); + asm.cmp(is_proc, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_send_block_arg_type, + ); - // Number of positional arguments the callee expects before the first - // keyword argument - let args_before_kw = required_num + opt_num; + // If this is a forwardable iseq, adjust the stack size accordingly + let callee_ep = if forwarding { + -1 + num_locals + VM_ENV_DATA_SIZE as i32 + } else { + -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1 + }; + let callee_specval = callee_ep + VM_ENV_DATA_INDEX_SPECVAL; + if callee_specval < 0 { + // Can't write to sp[-n] since that's where the arguments are + gen_counter_incr(jit, asm, Counter::send_iseq_clobbering_block_arg); + return None; + } + if iseq_has_rest || has_kwrest { + // The proc would be stored above the current stack top, where GC can't see it + gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_gc_unsafe); + return None; + } + let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg + let callee_specval = asm.ctx.sp_opnd(callee_specval); + asm.store(callee_specval, proc); + } + None => { + // Nothing to do + } + } - // This struct represents the metadata about the caller-specified - // keyword arguments. - let ci_kwarg = unsafe { vm_ci_kwarg(ci) }; - let caller_keyword_len: usize = if ci_kwarg.is_null() { - 0 - } else { - unsafe { get_cikw_keyword_len(ci_kwarg) } - .try_into() - .unwrap() - }; + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; + } - // This struct represents the metadata about the callee-specified - // keyword parameters. - let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + // push_splat_args does stack manipulation so we can no longer side exit + if let Some(array_length) = splat_array_length { + if !iseq_has_rest { + // Speculate that future splats will be done with + // an array that has the same length. We will insert guards. + argc = argc - 1 + array_length as i32; + if argc + asm.ctx.get_stack_size() as i32 > MAX_SPLAT_LENGTH { + gen_counter_incr(jit, asm, Counter::send_splat_too_long); + return None; + } + push_splat_args(array_length, asm); + } + } - asm.comment("keyword args"); + // This is a .send call and we need to adjust the stack + // TODO: This can be more efficient if we do it before + // extracting from the splat array above. + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + if iseq_has_rest { + // We are going to allocate so setting pc and sp. + jit_save_pc(jit, asm); + gen_save_sp(asm); + + let rest_param_array = if splat_call { + let non_rest_arg_count = argc - 1; + // We start by dupping the array because someone else might have + // a reference to it. This also normalizes to an ::Array instance. + let array = asm.stack_opnd(0); + let array = asm.ccall( + rb_ary_dup as *const u8, + vec![array], + ); + asm.stack_pop(1); // Pop array after ccall to use a register for passing it. + + // This is the end stack state of all `non_rest_arg_count` situations below + argc = required_num + opts_filled; + + if non_rest_arg_count > required_num + opt_num { + // If we have more arguments than required, we need to prepend + // the items from the stack onto the array. + let diff: u32 = (non_rest_arg_count - (required_num + opt_num)) + .try_into().unwrap(); + + // diff is >0 so no need to worry about null pointer + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(diff as i32)); + let values_ptr = asm.lea(values_opnd); + + asm_comment!(asm, "prepend stack values to rest array"); + let array = asm.ccall( + rb_ary_unshift_m as *const u8, + vec![Opnd::UImm(diff as u64), values_ptr, array], + ); + asm.stack_pop(diff as usize); - // This is the list of keyword arguments that the callee specified - // in its initial declaration. - let callee_kwargs = unsafe { (*keyword).table }; - let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap(); + array + } else if non_rest_arg_count < required_num + opt_num { + // If we have fewer arguments than required, we need to take some + // from the array and move them to the stack. + asm_comment!(asm, "take items from splat array"); - // Here we're going to build up a list of the IDs that correspond to - // the caller-specified keyword arguments. If they're not in the - // same order as the order specified in the callee declaration, then - // we're going to need to generate some code to swap values around - // on the stack. - let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs]; + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); - for kwarg_idx in 0..caller_keyword_len { - let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) }; - caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; - } - let mut kwarg_idx = caller_keyword_len; + // Copy required arguments to the stack without modifying the array + copy_splat_args_for_rest_callee(array, take_count, asm); - let mut unspecified_bits = 0; + // We will now slice the array to give us a new array of the correct size + let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(take_count.into())]); - let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); - for callee_idx in keyword_required_num..total_kwargs { - let mut already_passed = false; - let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) }; + sliced + } else { + // The arguments are equal so we can just push to the stack + asm_comment!(asm, "same length for splat array and rest param"); + assert!(non_rest_arg_count == required_num + opt_num); - for caller_idx in 0..caller_keyword_len { - if caller_kwargs[caller_idx] == callee_kwarg { - already_passed = true; - break; - } + array } + } else { + asm_comment!(asm, "rest parameter without splat"); + + assert!(argc >= required_num); + let n = (argc - required_num - opts_filled) as u32; + argc = required_num + opts_filled; + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); + asm.lea(values_opnd) + }; - if !already_passed { - // Reserve space on the stack for each default value we'll be - // filling in (which is done in the next loop). Also increments - // argc so that the callee's SP is recorded correctly. - argc += 1; - let default_arg = ctx.stack_push(Type::Unknown); - - // callee_idx - keyword->required_num is used in a couple of places below. - let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap(); - let callee_idx_isize: isize = callee_idx.try_into().unwrap(); - let extra_args = callee_idx_isize - req_num; - - //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num]; - let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) }; - - if default_value == Qundef { - // Qundef means that this value is not constant and must be - // recalculated at runtime, so we record it in unspecified_bits - // (Qnil is then used as a placeholder instead of Qundef). - unspecified_bits |= 0x01 << extra_args; - default_value = Qnil; - } + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); + asm.stack_pop(n.as_usize()); - asm.mov(default_arg, default_value.into()); + new_ary + }; - caller_kwargs[kwarg_idx] = callee_kwarg; - kwarg_idx += 1; - } + // Find where to put the rest parameter array + let rest_param = if opts_missing == 0 { + // All optionals are filled, the rest param goes at the top of the stack + argc += 1; + asm.stack_push(Type::TArray) + } else { + // The top of the stack will be a missing optional, but the rest + // parameter needs to be placed after all the missing optionals. + // Place it using a stack operand with a negative stack index. + // (Higher magnitude negative stack index have higher address.) + assert!(opts_missing > 0); + // The argument deepest in the stack will be the 0th local in the callee. + let callee_locals_base = argc - 1; + let rest_param_stack_idx = callee_locals_base - required_num - opt_num; + assert!(rest_param_stack_idx < 0); + asm.stack_opnd(rest_param_stack_idx) + }; + // Store rest param to memory to avoid register shuffle as + // we won't be reading it for the remainder of the block. + asm.ctx.dealloc_reg(rest_param.reg_opnd()); + asm.store(rest_param, rest_param_array); + } + + // Pop surplus positional arguments when yielding + if arg_setup_block { + let extras = argc - required_num - opt_num - kw_arg_num; + if extras > 0 { + // Checked earlier. If there are keyword args, then + // the positional arguments are not at the stack top. + assert_eq!(0, kw_arg_num); + + asm.stack_pop(extras as usize); + argc = required_num + opt_num + kw_arg_num; } + } - assert!(kwarg_idx == total_kwargs); + // Keyword argument passing + if doing_kw_call { + argc = gen_iseq_kw_call(jit, asm, kw_arg, iseq, argc, has_kwrest); + } + + // Same as vm_callee_setup_block_arg_arg0_check and vm_callee_setup_block_arg_arg0_splat + // on vm_callee_setup_block_arg for arg_setup_block. This is done after CALLER_SETUP_ARG + // and CALLER_REMOVE_EMPTY_KW_SPLAT, so this implementation is put here. This may need + // side exits, so you still need to allow side exits here if block_arg0_splat is true. + // Note that you can't have side exits after this arg0 splat. + if block_arg0_splat { + let arg0_opnd = asm.stack_opnd(0); + + // Only handle the case that you don't need to_ary conversion + let not_array_counter = Counter::invokeblock_iseq_arg0_not_array; + guard_object_is_array(asm, arg0_opnd, arg0_opnd.into(), not_array_counter); + + // Only handle the same that the array length == ISEQ's lead_num (most common) + let arg0_len_opnd = get_array_len(asm, arg0_opnd); + let lead_num = unsafe { rb_get_iseq_body_param_lead_num(iseq) }; + asm.cmp(arg0_len_opnd, lead_num.into()); + asm.jne(Target::side_exit(Counter::invokeblock_iseq_arg0_wrong_len)); + + let arg0_reg = asm.load(arg0_opnd); + let array_opnd = get_array_ptr(asm, arg0_reg); + asm_comment!(asm, "push splat arg0 onto the stack"); + asm.stack_pop(argc.try_into().unwrap()); + for i in 0..lead_num { + let stack_opnd = asm.stack_push(Type::Unknown); + asm.mov(stack_opnd, Opnd::mem(64, array_opnd, SIZEOF_VALUE_I32 * i)); + } + argc = lead_num; + } - // Next, we're going to loop through every keyword that was - // specified by the caller and make sure that it's in the correct - // place. If it's not we're going to swap it around with another one. - for kwarg_idx in 0..total_kwargs { - let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap(); - let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) }; + fn nil_fill(comment: &'static str, fill_range: std::ops::Range<i32>, asm: &mut Assembler) { + if fill_range.is_empty() { + return; + } - // If the argument is already in the right order, then we don't - // need to generate any code since the expected value is already - // in the right place on the stack. - if callee_kwarg == caller_kwargs[kwarg_idx] { - continue; - } + asm_comment!(asm, "{}", comment); + for i in fill_range { + let value_slot = asm.ctx.sp_opnd(i); + asm.store(value_slot, Qnil.into()); + } + } - // In this case the argument is not in the right place, so we - // need to find its position where it _should_ be and swap with - // that location. - for swap_idx in (kwarg_idx + 1)..total_kwargs { - if callee_kwarg == caller_kwargs[swap_idx] { - // First we're going to generate the code that is going - // to perform the actual swapping at runtime. - let swap_idx_i32: i32 = swap_idx.try_into().unwrap(); - let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap(); - let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw) - .try_into() - .unwrap(); - let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw) - .try_into() - .unwrap(); - stack_swap(jit, ctx, asm, offset0, offset1); - - // Next we're going to do some bookkeeping on our end so - // that we know the order that the arguments are - // actually in now. - caller_kwargs.swap(kwarg_idx, swap_idx); + if !forwarding { + // Nil-initialize missing optional parameters + nil_fill( + "nil-initialize missing optionals", + { + let begin = -argc + required_num + opts_filled; + let end = -argc + required_num + opt_num; - break; - } - } + begin..end + }, + asm + ); + // Nil-initialize the block parameter. It's the last parameter local + if iseq_has_block_param { + let block_param = asm.ctx.sp_opnd(-argc + num_params - 1); + asm.store(block_param, Qnil.into()); } + // Nil-initialize non-parameter locals + nil_fill( + "nil-initialize locals", + { + let begin = -argc + num_params; + let end = -argc + num_locals; + + begin..end + }, + asm + ); + } - // Keyword arguments cause a special extra local variable to be - // pushed onto the stack that represents the parameters that weren't - // explicitly given a value and have a non-constant default. - let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); - asm.mov(ctx.stack_opnd(-1), unspec_opnd.into()); + if forwarding { + assert_eq!(1, num_params); + // Write the CI in to the stack and ensure that it actually gets + // flushed to memory + asm_comment!(asm, "put call info for forwarding"); + let ci_opnd = asm.stack_opnd(-1); + asm.ctx.dealloc_reg(ci_opnd.reg_opnd()); + asm.mov(ci_opnd, VALUE(ci as usize).into()); + + // Nil-initialize other locals which are above the CI + nil_fill("nil-initialize locals", 1..num_locals, asm); } // Points to the receiver operand on the stack unless a captured environment is used let recv = match captured_opnd { Some(captured_opnd) => asm.load(Opnd::mem(64, captured_opnd, 0)), // captured->self - _ => ctx.stack_opnd(argc), + _ => asm.stack_opnd(argc), }; let captured_self = captured_opnd.is_some(); - let sp_offset = (argc as isize) + if captured_self { 0 } else { 1 }; + let sp_offset = argc + if captured_self { 0 } else { 1 }; // Store the updated SP on the current frame (pop arguments and receiver) - asm.comment("store caller sp"); - let caller_sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * -sp_offset)); + asm_comment!(asm, "store caller sp"); + let caller_sp = asm.lea(asm.ctx.sp_opnd(-sp_offset)); asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); // Store the next PC in the current frame jit_save_pc(jit, asm); // Adjust the callee's stack pointer - let offs = - (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 }); - let callee_sp = asm.lea(ctx.sp_opnd(offs)); + let callee_sp = if forwarding { + let offs = num_locals + VM_ENV_DATA_SIZE as i32; + asm.lea(asm.ctx.sp_opnd(offs)) + } else { + let offs = -argc + num_locals + VM_ENV_DATA_SIZE as i32; + asm.lea(asm.ctx.sp_opnd(offs)) + }; let specval = if let Some(prev_ep) = prev_ep { // We've already side-exited if the callee expects a block, so we @@ -5540,16 +8184,16 @@ fn gen_send_iseq( } else if let Some(captured_opnd) = captured_opnd { let ep_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32)); // captured->ep SpecVal::PrevEPOpnd(ep_opnd) - } else if block_arg_type == Some(Type::BlockParamProxy) { - SpecVal::BlockParamProxy - } else if let Some(block_val) = block { - SpecVal::BlockISeq(block_val) + } else if let Some(BlockArg::TProc) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) + } else if let Some(BlockArg::BlockParamProxy) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) } else { - SpecVal::None + SpecVal::BlockHandler(block) }; // Setup the new frame - gen_push_frame(jit, ctx, asm, true, ControlFrame { + perf_call!("gen_send_iseq: ", gen_push_frame(jit, asm, ControlFrame { frame_type, specval, cme, @@ -5557,93 +8201,684 @@ fn gen_send_iseq( sp: callee_sp, iseq: Some(iseq), pc: None, // We are calling into jitted code, which will set the PC as necessary - local_size: num_locals - }); + })); // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). // mov(cb, REG0, const_ptr_opnd(start_pc)); // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0); - // Stub so we can return to JITted code - let return_block = BlockId { - iseq: jit.iseq, - idx: jit_next_insn_idx(jit), - }; + // Create a blockid for the callee + let callee_blockid = BlockId { iseq, idx: start_pc_offset }; // Create a context for the callee let mut callee_ctx = Context::default(); + // If the callee has :inline_block annotation and the callsite has a block ISEQ, + // duplicate a callee block for each block ISEQ to make its `yield` monomorphic. + if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) { + callee_ctx.set_inline_block(iseq); + } + // Set the argument types in the callee's context for arg_idx in 0..argc { - let stack_offs: u16 = (argc - arg_idx - 1).try_into().unwrap(); - let arg_type = ctx.get_opnd_type(StackOpnd(stack_offs)); + let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap(); + let arg_type = asm.ctx.get_opnd_type(StackOpnd(stack_offs)); callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type); } + // If we're in a forwarding callee, there will be one unknown type + // written in to the local table (the caller's CI object) + if forwarding { + callee_ctx.set_local_type(0, Type::Unknown) + } + + // Set the receiver type in the callee's context let recv_type = if captured_self { Type::Unknown // we don't track the type information of captured->self for now } else { - ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap())) + asm.ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap())) }; callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); + // Spill or preserve argument registers + if forwarding { + // When forwarding, the callee's local table has only a callinfo, + // so we can't map the actual arguments to the callee's locals. + asm.spill_regs(); + } else { + // Discover stack temp registers that can be used as the callee's locals + let mapped_temps = asm.map_temp_regs_to_args(&mut callee_ctx, argc); + + // Spill stack temps and locals that are not used by the callee. + // This must be done before changing the SP register. + asm.spill_regs_except(&mapped_temps); + + // If the callee block has been compiled before, spill/move registers to reuse the existing block + // for minimizing the number of blocks we need to compile. + if let Some(existing_reg_mapping) = find_most_compatible_reg_mapping(callee_blockid, &callee_ctx) { + asm_comment!(asm, "reuse maps: {:?} -> {:?}", callee_ctx.get_reg_mapping(), existing_reg_mapping); + + // Spill the registers that are not used in the existing block. + // When the same ISEQ is compiled as an entry block, it starts with no registers allocated. + for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() { + if existing_reg_mapping.get_reg(reg_opnd).is_none() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + let spilled_temp = asm.stack_opnd(argc - local_idx as i32 - 1); + asm.spill_reg(spilled_temp); + callee_ctx.dealloc_reg(reg_opnd); + } + RegOpnd::Stack(_) => unreachable!("callee {:?} should have been spilled", reg_opnd), + } + } + } + assert!(callee_ctx.get_reg_mapping().get_reg_opnds().len() <= existing_reg_mapping.get_reg_opnds().len()); + + // Load the registers that are spilled in this block but used in the existing block. + // When there are multiple callsites, some registers spilled in this block may be used at other callsites. + for ®_opnd in existing_reg_mapping.get_reg_opnds().iter() { + if callee_ctx.get_reg_mapping().get_reg(reg_opnd).is_none() { + match reg_opnd { + RegOpnd::Local(local_idx) => { + callee_ctx.alloc_reg(reg_opnd); + let loaded_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; + let loaded_temp = asm.stack_opnd(argc - local_idx as i32 - 1); + asm.load_into(Opnd::Reg(loaded_reg), loaded_temp); + } + RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd), + } + } + } + assert_eq!(callee_ctx.get_reg_mapping().get_reg_opnds().len(), existing_reg_mapping.get_reg_opnds().len()); + + // Shuffle registers to make the register mappings compatible + let mut moves = vec![]; + for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() { + let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; + let new_reg = TEMP_REGS[existing_reg_mapping.get_reg(reg_opnd).unwrap()]; + moves.push((new_reg, Opnd::Reg(old_reg))); + } + for (reg, opnd) in Assembler::reorder_reg_moves(&moves) { + asm.load_into(Opnd::Reg(reg), opnd); + } + callee_ctx.set_reg_mapping(existing_reg_mapping); + } + } + + // Update SP register for the callee. This must be done after referencing frame.recv, + // which may be SP-relative. + asm.mov(SP, callee_sp); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. + // We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall(). + if get_option!(gen_stats) { + // Protect caller-saved registers in case they're used for arguments + let mapping = asm.cpush_all(); + + // Assemble the ISEQ name string + let name_str = get_iseq_name(iseq); + + // Get an index for this ISEQ name + let iseq_idx = get_iseq_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); + asm.cpop_all(mapping); + } + // The callee might change locals through Kernel#binding and other means. - ctx.clear_local_types(); + asm.clear_local_types(); - // Pop arguments and receiver in return context, push the return value - // After the return, sp_offset will be 1. The codegen for leave writes - // the return value in case of JIT-to-JIT return. - let mut return_ctx = ctx.clone(); - return_ctx.stack_pop(sp_offset.try_into().unwrap()); - return_ctx.stack_push(Type::Unknown); - return_ctx.set_sp_offset(1); - return_ctx.reset_chain_depth(); + // Pop arguments and receiver in return context and + // mark it as a continuation of gen_leave() + let mut return_asm = Assembler::new(jit.num_locals()); + return_asm.ctx = asm.ctx; + return_asm.stack_pop(sp_offset.try_into().unwrap()); + return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above + return_asm.ctx.reset_chain_depth_and_defer(); + return_asm.ctx.set_as_return_landing(); + + // Stub so we can return to JITted code + let return_block = BlockId { + iseq: jit.iseq, + idx: jit.next_insn_idx(), + }; // Write the JIT return address on the callee frame - gen_branch( - jit, + jit.gen_branch( asm, - ocb, return_block, - &return_ctx, + &return_asm.ctx, None, None, - gen_return_branch, + BranchGenFn::JITReturn, ); - //print_str(cb, "calling Ruby func:"); - //print_str(cb, rb_id2name(vm_ci_mid(ci))); + // ec->cfp is updated after cfp->jit_return for rb_profile_frames() safety + asm_comment!(asm, "switch to new CFP"); + let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Directly jump to the entry point of the callee gen_direct_jump( jit, &callee_ctx, - BlockId { - iseq: iseq, - idx: start_pc_offset, - }, + callee_blockid, asm, ); - EndBlock + Some(EndBlock) +} + +// Check if we can handle a keyword call +fn gen_iseq_kw_call_checks( + jit: &JITState, + asm: &mut Assembler, + iseq: *const rb_iseq_t, + kw_arg: *const rb_callinfo_kwarg, + has_kwrest: bool, + caller_kw_num: i32 +) -> Option<()> { + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + let mut required_kwargs_filled = 0; + + if keyword_num > 30 || caller_kw_num > 64 { + // We have so many keywords that (1 << num) encoded as a FIXNUM + // (which shifts it left one more) no longer fits inside a 32-bit + // immediate. Similarly, we use a u64 in case of keyword rest parameter. + gen_counter_incr(jit, asm, Counter::send_iseq_too_many_kwargs); + return None; + } + + // Check that the kwargs being passed are valid + if caller_kw_num > 0 { + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + // SAFETY: see compile.c for sizing of this slice. + let callee_kwargs = if keyword_num == 0 { + &[] + } else { + unsafe { slice::from_raw_parts((*keyword).table, keyword_num) } + }; + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let kw_arg_keyword_len = caller_kw_num as usize; + let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len]; + for kwarg_idx in 0..kw_arg_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) }; + caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + // First, we're going to be sure that the names of every + // caller-specified keyword argument correspond to a name in the + // list of callee-specified keyword parameters. + for caller_kwarg in caller_kwargs { + let search_result = callee_kwargs + .iter() + .enumerate() // inject element index + .find(|(_, &kwarg)| kwarg == caller_kwarg); + + match search_result { + None if !has_kwrest => { + // If the keyword was never found, then we know we have a + // mismatch in the names of the keyword arguments, so we need to + // bail. + gen_counter_incr(jit, asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + Some((callee_idx, _)) if callee_idx < keyword_required_num => { + // Keep a count to ensure all required kwargs are specified + required_kwargs_filled += 1; + } + _ => (), + } + } + } + assert!(required_kwargs_filled <= keyword_required_num); + if required_kwargs_filled != keyword_required_num { + gen_counter_incr(jit, asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + + Some(()) +} + +// Codegen for keyword argument handling. Essentially private to gen_send_iseq() since +// there are a lot of preconditions to check before reaching this code. +fn gen_iseq_kw_call( + jit: &mut JITState, + asm: &mut Assembler, + ci_kwarg: *const rb_callinfo_kwarg, + iseq: *const rb_iseq_t, + mut argc: i32, + has_kwrest: bool, +) -> i32 { + let caller_keyword_len_i32: i32 = if ci_kwarg.is_null() { + 0 + } else { + unsafe { get_cikw_keyword_len(ci_kwarg) } + }; + let caller_keyword_len: usize = caller_keyword_len_i32.try_into().unwrap(); + let anon_kwrest = unsafe { rb_get_iseq_flags_anon_kwrest(iseq) && !get_iseq_flags_has_kw(iseq) }; + + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + + asm_comment!(asm, "keyword args"); + + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + let callee_kwargs = unsafe { (*keyword).table }; + let callee_kw_count_i32: i32 = unsafe { (*keyword).num }; + let callee_kw_count: usize = callee_kw_count_i32.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let mut kwargs_order: Vec<ID> = vec![0; cmp::max(caller_keyword_len, callee_kw_count)]; + for kwarg_idx in 0..caller_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) }; + kwargs_order[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + let mut unspecified_bits = 0; + + // The stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = caller_keyword_len_i32 - 1; + + // Build the keyword rest parameter hash before we make any changes to the order of + // the supplied keyword arguments + let kwrest_type = if has_kwrest { + c_callable! { + fn build_kw_rest(rest_mask: u64, stack_kwargs: *const VALUE, keywords: *const rb_callinfo_kwarg) -> VALUE { + if keywords.is_null() { + return unsafe { rb_hash_new() }; + } + + // Use the total number of supplied keywords as a size upper bound + let keyword_len = unsafe { (*keywords).keyword_len } as usize; + let hash = unsafe { rb_hash_new_with_size(keyword_len as u64) }; + + // Put pairs into the kwrest hash as the mask describes + for kwarg_idx in 0..keyword_len { + if (rest_mask & (1 << kwarg_idx)) != 0 { + unsafe { + let keyword_symbol = (*keywords).keywords.as_ptr().add(kwarg_idx).read(); + let keyword_value = stack_kwargs.add(kwarg_idx).read(); + rb_hash_aset(hash, keyword_symbol, keyword_value); + } + } + } + return hash; + } + } + + asm_comment!(asm, "build kwrest hash"); + + // Make a bit mask describing which keywords should go into kwrest. + let mut rest_mask: u64 = 0; + // Index for one argument that will go into kwrest. + let mut rest_collected_idx = None; + for (supplied_kw_idx, &supplied_kw) in kwargs_order.iter().take(caller_keyword_len).enumerate() { + let mut found = false; + for callee_idx in 0..callee_kw_count { + let callee_kw = unsafe { callee_kwargs.add(callee_idx).read() }; + if callee_kw == supplied_kw { + found = true; + break; + } + } + if !found { + rest_mask |= 1 << supplied_kw_idx; + if rest_collected_idx.is_none() { + rest_collected_idx = Some(supplied_kw_idx as i32); + } + } + } + + let (kwrest, kwrest_type) = if rest_mask == 0 && anon_kwrest { + // In case the kwrest hash should be empty and is anonymous in the callee, + // we can pass nil instead of allocating. Anonymous kwrest can only be + // delegated, and nil is the same as an empty hash when delegating. + (Qnil.into(), Type::Nil) + } else { + // Save PC and SP before allocating + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // Build the kwrest hash. `struct rb_callinfo_kwarg` is malloc'd, so no GC concerns. + let kwargs_start = asm.lea(asm.ctx.sp_opnd(-caller_keyword_len_i32)); + let hash = asm.ccall( + build_kw_rest as _, + vec![rest_mask.into(), kwargs_start, Opnd::const_ptr(ci_kwarg.cast())] + ); + (hash, Type::THash) + }; + + // The kwrest parameter sits after `unspecified_bits` if the callee specifies any + // keywords. + let stack_kwrest_idx = kwargs_stack_base - callee_kw_count_i32 - i32::from(callee_kw_count > 0); + let stack_kwrest = asm.stack_opnd(stack_kwrest_idx); + // If `stack_kwrest` already has another argument there, we need to stow it elsewhere + // first before putting kwrest there. Use `rest_collected_idx` because that value went + // into kwrest so the slot is now free. + let kwrest_idx = callee_kw_count + usize::from(callee_kw_count > 0); + if let (Some(rest_collected_idx), true) = (rest_collected_idx, kwrest_idx < caller_keyword_len) { + let rest_collected = asm.stack_opnd(kwargs_stack_base - rest_collected_idx); + let mapping = asm.ctx.get_opnd_mapping(stack_kwrest.into()); + asm.mov(rest_collected, stack_kwrest); + asm.ctx.set_opnd_mapping(rest_collected.into(), mapping); + // Update our bookkeeping to inform the reordering step later. + kwargs_order[rest_collected_idx as usize] = kwargs_order[kwrest_idx]; + kwargs_order[kwrest_idx] = 0; + } + // Put kwrest straight into memory, since we might pop it later + asm.ctx.dealloc_reg(stack_kwrest.reg_opnd()); + asm.mov(stack_kwrest, kwrest); + if stack_kwrest_idx >= 0 { + asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::MapToStack(kwrest_type)); + } + + Some(kwrest_type) + } else { + None + }; + + // Ensure the stack is large enough for the callee + for _ in caller_keyword_len..callee_kw_count { + argc += 1; + asm.stack_push(Type::Unknown); + } + // Now this is the stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = kwargs_order.len() as i32 - 1; + + // Next, we're going to loop through every keyword that was + // specified by the caller and make sure that it's in the correct + // place. If it's not we're going to swap it around with another one. + for kwarg_idx in 0..callee_kw_count { + let callee_kwarg = unsafe { callee_kwargs.add(kwarg_idx).read() }; + + // If the argument is already in the right order, then we don't + // need to generate any code since the expected value is already + // in the right place on the stack. + if callee_kwarg == kwargs_order[kwarg_idx] { + continue; + } + + // In this case the argument is not in the right place, so we + // need to find its position where it _should_ be and swap with + // that location. + for swap_idx in 0..kwargs_order.len() { + if callee_kwarg == kwargs_order[swap_idx] { + // First we're going to generate the code that is going + // to perform the actual swapping at runtime. + let swap_idx_i32: i32 = swap_idx.try_into().unwrap(); + let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap(); + let offset0 = kwargs_stack_base - swap_idx_i32; + let offset1 = kwargs_stack_base - kwarg_idx_i32; + stack_swap(asm, offset0, offset1); + + // Next we're going to do some bookkeeping on our end so + // that we know the order that the arguments are + // actually in now. + kwargs_order.swap(kwarg_idx, swap_idx); + + break; + } + } + } + + // Now that every caller specified kwarg is in the right place, filling + // in unspecified default paramters won't overwrite anything. + for kwarg_idx in keyword_required_num..callee_kw_count { + if kwargs_order[kwarg_idx] != unsafe { callee_kwargs.add(kwarg_idx).read() } { + let default_param_idx = kwarg_idx - keyword_required_num; + let mut default_value = unsafe { (*keyword).default_values.add(default_param_idx).read() }; + + if default_value == Qundef { + // Qundef means that this value is not constant and must be + // recalculated at runtime, so we record it in unspecified_bits + // (Qnil is then used as a placeholder instead of Qundef). + unspecified_bits |= 0x01 << default_param_idx; + default_value = Qnil; + } + + let default_param = asm.stack_opnd(kwargs_stack_base - kwarg_idx as i32); + let param_type = Type::from(default_value); + asm.mov(default_param, default_value.into()); + asm.ctx.set_opnd_mapping(default_param.into(), TempMapping::MapToStack(param_type)); + } + } + + // Pop extra arguments that went into kwrest now that they're at stack top + if has_kwrest && caller_keyword_len > callee_kw_count { + let extra_kwarg_count = caller_keyword_len - callee_kw_count; + asm.stack_pop(extra_kwarg_count); + argc = argc - extra_kwarg_count as i32; + } + + // Keyword arguments cause a special extra local variable to be + // pushed onto the stack that represents the parameters that weren't + // explicitly given a value and have a non-constant default. + if callee_kw_count > 0 { + let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); + let top = asm.stack_push(Type::Fixnum); + asm.mov(top, unspec_opnd.into()); + argc += 1; + } + + // The kwrest parameter sits after `unspecified_bits` + if let Some(kwrest_type) = kwrest_type { + let kwrest = asm.stack_push(kwrest_type); + // We put the kwrest parameter in memory earlier + asm.ctx.dealloc_reg(kwrest.reg_opnd()); + argc += 1; + } + + argc +} + +/// This is a helper function to allow us to exit early +/// during code generation if a predicate is true. +/// We return Option<()> here because we will be able to +/// short-circuit using the ? operator if we return None. +/// It would be great if rust let you implement ? for your +/// own types, but as of right now they don't. +fn exit_if(jit: &JITState, asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> { + if pred { + gen_counter_incr(jit, asm, counter); + return None + } + Some(()) +} + +#[must_use] +fn exit_if_tail_call(jit: &JITState, asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> { + exit_if(jit, asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall) +} + +#[must_use] +fn exit_if_has_post(jit: &JITState, asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> { + exit_if(jit, asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post) +} + +#[must_use] +fn exit_if_kwsplat_non_nil(jit: &JITState, asm: &mut Assembler, flags: u32, counter: Counter) -> Option<()> { + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let kw_splat_stack = StackOpnd((flags & VM_CALL_ARGS_BLOCKARG != 0).into()); + exit_if(jit, asm, kw_splat && asm.ctx.get_opnd_type(kw_splat_stack) != Type::Nil, counter) +} + +#[must_use] +fn exit_if_has_rest_and_captured(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + exit_if(jit, asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured) +} + +#[must_use] +fn exit_if_has_kwrest_and_captured(jit: &JITState, asm: &mut Assembler, iseq_has_kwrest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + // We need to call a C function to allocate the kwrest hash, but also need to hold the captred + // block across the call, which we can't do. + exit_if(jit, asm, iseq_has_kwrest && captured_opnd.is_some(), Counter::send_iseq_has_kwrest_and_captured) +} + +#[must_use] +fn exit_if_has_rest_and_supplying_kws(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, supplying_kws: bool) -> Option<()> { + // There can be a gap between the rest parameter array and the supplied keywords, or + // no space to put the rest array (e.g. `def foo(*arr, k:) = arr; foo(k: 1)` 1 is + // sitting where the rest array should be). + exit_if( + jit, + asm, + iseq_has_rest && supplying_kws, + Counter::send_iseq_has_rest_and_kw_supplied, + ) +} + +#[must_use] +fn exit_if_supplying_kw_and_has_no_kw(jit: &JITState, asm: &mut Assembler, supplying_kws: bool, callee_kws: bool) -> Option<()> { + // Passing keyword arguments to a callee means allocating a hash and treating + // that as a positional argument. Bail for now. + exit_if( + jit, + asm, + supplying_kws && !callee_kws, + Counter::send_iseq_has_no_kw, + ) +} + +#[must_use] +fn exit_if_supplying_kws_and_accept_no_kwargs(jit: &JITState, asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> { + // If we have a method accepting no kwargs (**nil), exit if we have passed + // it any kwargs. + exit_if( + jit, + asm, + supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) }, + Counter::send_iseq_accepts_no_kwarg + ) +} + +#[must_use] +fn exit_if_doing_kw_and_splat(jit: &JITState, asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> { + exit_if(jit, asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw) +} + +#[must_use] +fn exit_if_wrong_number_arguments( + jit: &JITState, + asm: &mut Assembler, + args_setup_block: bool, + opts_filled: i32, + flags: u32, + opt_num: i32, + iseq_has_rest: bool, +) -> Option<()> { + // Too few arguments and no splat to make up for it + let too_few = opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0; + // Too many arguments and no sink that take them + let too_many = opts_filled > opt_num && !(iseq_has_rest || args_setup_block); + + exit_if(jit, asm, too_few || too_many, Counter::send_iseq_arity_error) +} + +#[must_use] +fn exit_if_doing_kw_and_opts_missing(jit: &JITState, asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> { + // If we have unfilled optional arguments and keyword arguments then we + // would need to adjust the arguments location to account for that. + // For now we aren't handling this case. + exit_if(jit, asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw) +} + +#[must_use] +fn exit_if_has_rest_and_optional_and_block(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> { + exit_if( + jit, + asm, + iseq_has_rest && opt_num != 0 && (unsafe { get_iseq_flags_has_block(iseq) } || block_arg), + Counter::send_iseq_has_rest_opt_and_block + ) +} + +#[derive(Clone, Copy)] +enum BlockArg { + Nil, + /// A special sentinel value indicating the block parameter should be read from + /// the current surrounding cfp + BlockParamProxy, + /// A proc object. Could be an instance of a subclass of ::rb_cProc + TProc, +} + +#[must_use] +fn exit_if_unsupported_block_arg_type( + jit: &mut JITState, + asm: &mut Assembler, + supplying_block_arg: bool +) -> Option<Option<BlockArg>> { + let block_arg_type = if supplying_block_arg { + asm.ctx.get_opnd_type(StackOpnd(0)) + } else { + // Passing no block argument + return Some(None); + }; + + match block_arg_type { + // We'll handle Nil and BlockParamProxy later + Type::Nil => Some(Some(BlockArg::Nil)), + Type::BlockParamProxy => Some(Some(BlockArg::BlockParamProxy)), + _ if { + let sample_block_arg = jit.peek_at_stack(&asm.ctx, 0); + unsafe { rb_obj_is_proc(sample_block_arg) }.test() + } => { + // Speculate that we'll have a proc as the block arg + Some(Some(BlockArg::TProc)) + } + _ => { + gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_type); + None + } + } +} + +#[must_use] +fn exit_if_stack_too_large(iseq: *const rb_iseq_t) -> Option<()> { + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + // Reject ISEQs with very large temp stacks, + // this will allow us to use u8/i8 values to track stack_size and sp_offset + if stack_max >= i8::MAX as u32 { + incr_counter!(iseq_stack_too_large); + return None; + } + Some(()) } fn gen_struct_aref( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, comptime_recv: VALUE, - _comptime_recv_klass: VALUE, flags: u32, argc: i32, -) -> CodegenStatus { +) -> Option<CodegenStatus> { if unsafe { vm_ci_argc(ci) } != 0 { - return CantCompile; + return None; } let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } @@ -5659,13 +8894,20 @@ fn gen_struct_aref( { let native_off = (off as i64) * (SIZEOF_VALUE as i64); if native_off > (i32::MAX as i64) { - return CantCompile; + return None; } } + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(jit, asm, Counter::send_cfunc_tracing); + return None; + } + // This is a .send call and we need to adjust the stack if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); + handle_opt_send_shift_stack(asm, argc); } // All structs from the same Struct class should have the same @@ -5674,9 +8916,10 @@ fn gen_struct_aref( // true of the converse. let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; - asm.comment("struct aref"); + asm_comment!(asm, "struct aref"); - let recv = asm.load(ctx.stack_pop(1)); + let recv = asm.stack_pop(1); + let recv = asm.load(recv); let val = if embedded != VALUE(0) { Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + (SIZEOF_VALUE_I32 * off)) @@ -5685,32 +8928,41 @@ fn gen_struct_aref( Opnd::mem(64, rstruct_ptr, SIZEOF_VALUE_I32 * off) }; - let ret = ctx.stack_push(Type::Unknown); + let ret = asm.stack_push(Type::Unknown); asm.mov(ret, val); - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) } fn gen_struct_aset( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, comptime_recv: VALUE, - _comptime_recv_klass: VALUE, flags: u32, argc: i32, -) -> CodegenStatus { +) -> Option<CodegenStatus> { if unsafe { vm_ci_argc(ci) } != 1 { - return CantCompile; + return None; + } + + // If the comptime receiver is frozen, writing a struct member will raise an exception + // and we don't want to JIT code to deal with that situation. + if comptime_recv.is_frozen() { + return None; + } + + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(jit, asm, Counter::send_cfunc_tracing); + return None; } // This is a .send call and we need to adjust the stack if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); + handle_opt_send_shift_stack(asm, argc); } let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } @@ -5721,28 +8973,78 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); - asm.comment("struct aset"); + // Even if the comptime recv was not frozen, future recv may be. So we need to emit a guard + // that the recv is not frozen. + // We know all structs are heap objects, so we can check the flag directly. + let recv = asm.stack_opnd(1); + let recv = asm.load(recv); + let flags = asm.load(Opnd::mem(VALUE_BITS, recv, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags, (RUBY_FL_FREEZE as u64).into()); + asm.jnz(Target::side_exit(Counter::opt_aset_frozen)); + + // Not frozen, so we can proceed. - let val = ctx.stack_pop(1); - let recv = ctx.stack_pop(1); + asm_comment!(asm, "struct aset"); + + let val = asm.stack_pop(1); + let recv = asm.stack_pop(1); let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); - let ret = ctx.stack_push(Type::Unknown); + let ret = asm.stack_push(Type::Unknown); asm.mov(ret, val); - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) +} + +// Generate code that calls a method with dynamic dispatch +fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, + sp_pops: usize, + vm_sendish: F, +) -> Option<CodegenStatus> { + // Our frame handling is not compatible with tailcall + if unsafe { vm_ci_flag((*cd).ci) } & VM_CALL_TAILCALL != 0 { + return None; + } + jit_perf_symbol_push!(jit, asm, "gen_send_dynamic", PerfMap::Codegen); + + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + + // Save PC and SP to prepare for dynamic dispatch + jit_prepare_non_leaf_call(jit, asm); + + // Dispatch a method + let ret = vm_sendish(asm); + + // Pop arguments and a receiver + asm.stack_pop(sp_pops); + + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // Fix the interpreter SP deviated by vm_sendish + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), SP); + + gen_counter_incr(jit, asm, Counter::num_send_dynamic); + + jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen); + + // End the current block for invalidationg and sharing the same successor + jump_to_next_insn(jit, asm) } fn gen_send_general( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, cd: *const rb_call_data, - block: Option<IseqPtr>, -) -> CodegenStatus { + block: Option<BlockHandler>, +) -> Option<CodegenStatus> { // Relevant definitions: // rb_execution_context_t : vm_core.h // invoker, cfunc logic : method.h, vm_method.c @@ -5758,49 +9060,78 @@ fn gen_send_general( let mut mid = unsafe { vm_ci_mid(ci) }; let mut flags = unsafe { vm_ci_flag(ci) }; - // Don't JIT calls with keyword splat - if flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(asm, send_kw_splat); - return CantCompile; + // Defer compilation so we can specialize on class of receiver + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - // Defer compilation so we can specialize on class of receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + let ci_flags = unsafe { vm_ci_flag(ci) }; + + // Dynamic stack layout. No good way to support without inlining. + if ci_flags & VM_CALL_FORWARDING != 0 { + gen_counter_incr(jit, asm, Counter::send_forwarding); + return None; } let recv_idx = argc + if flags & VM_CALL_ARGS_BLOCKARG != 0 { 1 } else { 0 }; - - let comptime_recv = jit_peek_at_stack(jit, ctx, recv_idx as isize); + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); let comptime_recv_klass = comptime_recv.class_of(); + assert_eq!(RUBY_T_CLASS, comptime_recv_klass.builtin_type(), + "objects visible to ruby code should have a T_CLASS in their klass field"); - // Guard that the receiver has the same class as the one from compile time - let side_exit = get_side_exit(jit, ocb, ctx); + // Don't compile calls through singleton classes to avoid retaining the receiver. + // Make an exception for class methods since classes tend to be retained anyways. + // Also compile calls on top_self to help tests. + if VALUE(0) != unsafe { FL_TEST(comptime_recv_klass, VALUE(RUBY_FL_SINGLETON as usize)) } + && comptime_recv != unsafe { rb_vm_top_self() } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_CLASS) } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_MODULE) } { + gen_counter_incr(jit, asm, Counter::send_singleton_class); + return None; + } // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(recv_idx); - let recv_opnd = StackOpnd(recv_idx.try_into().unwrap()); - jit_guard_known_klass( + let recv = asm.stack_opnd(recv_idx); + let recv_opnd: YARVOpnd = recv.into(); + + // Log the name of the method we're calling to + asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); + + // Gather some statistics about sends + gen_counter_incr(jit, asm, Counter::num_send); + if let Some(_known_klass) = asm.ctx.get_opnd_type(recv_opnd).known_class() { + gen_counter_incr(jit, asm, Counter::num_send_known_class); + } + if asm.ctx.get_chain_depth() > 1 { + gen_counter_incr(jit, asm, Counter::num_send_polymorphic); + } + // If megamorphic, let the caller fallback to dynamic dispatch + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(jit, asm, Counter::send_megamorphic); + return None; + } + + perf_call!("gen_send_general: ", jit_guard_known_klass( jit, - ctx, asm, - ocb, - comptime_recv_klass, recv, recv_opnd, comptime_recv, SEND_MAX_DEPTH, - side_exit, - ); + Counter::guard_send_klass_megamorphic, + )); // Do method lookup let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; if cme.is_null() { - // TODO: counter - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cme_not_found); + return None; } + // Load an overloaded cme if applicable. See vm_search_cc(). + // It allows you to use a faster ISEQ if possible. + cme = unsafe { rb_check_overloaded_cme(cme, ci) }; + let visi = unsafe { METHOD_ENTRY_VISI(cme) }; match visi { METHOD_VISI_PUBLIC => { @@ -5810,15 +9141,16 @@ fn gen_send_general( if flags & VM_CALL_FCALL == 0 { // Can only call private methods with FCALL callsites. // (at the moment they are callsites without a receiver or an explicit `self` receiver) - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_private_not_fcall); + return None; } } METHOD_VISI_PROTECTED => { // If the method call is an FCALL, it is always valid if flags & VM_CALL_FCALL == 0 { - // otherwise we need an ancestry check to ensure the receiver is vaild to be called + // otherwise we need an ancestry check to ensure the receiver is valid to be called // as protected - jit_protected_callee_ancestry_guard(jit, asm, ocb, cme, side_exit); + jit_protected_callee_ancestry_guard(asm, cme); } } _ => { @@ -5828,7 +9160,7 @@ fn gen_send_general( // Register block for invalidation //assert!(cme->called_id == mid); - assume_method_lookup_stable(jit, ocb, cme); + jit.assume_method_lookup_stable(asm, cme); // To handle the aliased method case (VM_METHOD_TYPE_ALIAS) loop { @@ -5838,38 +9170,58 @@ fn gen_send_general( VM_METHOD_TYPE_ISEQ => { let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - return gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc, None); + return perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, None, cme, block, flags, argc, None) }; } VM_METHOD_TYPE_CFUNC => { - return gen_send_cfunc( + return perf_call! { gen_send_cfunc( jit, - ctx, asm, - ocb, ci, cme, block, - &comptime_recv_klass, + Some(comptime_recv_klass), flags, argc, - ); + ) }; } VM_METHOD_TYPE_IVAR => { - if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_ivar); - return CantCompile; + // This is a .send call not supported right now for attr_reader + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(jit, asm, Counter::send_send_attr_reader); + return None; } - if argc != 0 { - // Argument count mismatch. Getters take no arguments. - gen_counter_incr!(asm, send_getter_arity); - return CantCompile; + if flags & VM_CALL_ARGS_BLOCKARG != 0 { + match asm.ctx.get_opnd_type(StackOpnd(0)) { + Type::Nil | Type::BlockParamProxy => { + // Getters ignore the block arg, and these types of block args can be + // passed without side-effect (never any `to_proc` call). + asm.stack_pop(1); + } + _ => { + gen_counter_incr(jit, asm, Counter::send_getter_block_arg); + return None; + } + } } - // This is a .send call not supported right now for getters - if flags & VM_CALL_OPT_SEND != 0 { - gen_counter_incr!(asm, send_send_getter); - return CantCompile; + if argc != 0 { + // Guard for simple splat of empty array + if VM_CALL_ARGS_SPLAT == flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG | VM_CALL_KW_SPLAT) + && argc == 1 { + // Not using chain guards since on failure these likely end up just raising + // ArgumentError + let splat = asm.stack_opnd(0); + guard_object_is_array(asm, splat, splat.into(), Counter::guard_send_getter_splat_non_empty); + let splat_len = get_array_len(asm, splat); + asm.cmp(splat_len, 0.into()); + asm.jne(Target::side_exit(Counter::guard_send_getter_splat_non_empty)); + asm.stack_pop(1); + } else { + // Argument count mismatch. Getters take no arguments. + gen_counter_incr(jit, asm, Counter::send_getter_arity); + return None; + } } if c_method_tracing_currently_enabled(jit) { @@ -5878,198 +9230,134 @@ fn gen_send_general( // Handling the C method tracing events for attr_accessor // methods is easier than regular C methods as we know the // "method" we are calling into never enables those tracing - // events. Once global invalidation runs, the code for the - // attr_accessor is invalidated and we exit at the closest - // instruction boundary which is always outside of the body of - // the attr_accessor code. - gen_counter_incr!(asm, send_cfunc_tracing); - return CantCompile; + // events. We are never inside the code that needs to be + // invalidated when invalidation happens. + gen_counter_incr(jit, asm, Counter::send_cfunc_tracing); + return None; } + let recv = asm.stack_opnd(0); // the receiver should now be the stack top let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; - if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; - } - return gen_get_ivar( jit, - ctx, asm, - ocb, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv, - recv_opnd, - side_exit, + recv.into(), ); } VM_METHOD_TYPE_ATTRSET => { + // This is a .send call not supported right now for attr_writer + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(jit, asm, Counter::send_send_attr_writer); + return None; + } if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_attrset); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_args_splat_attrset); + return None; } if flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(asm, send_attrset_kwargs); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_attrset_kwargs); + return None; } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } { - gen_counter_incr!(asm, send_ivar_set_method); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_ivar_set_method); + return None; } else if c_method_tracing_currently_enabled(jit) { // Can't generate code for firing c_call and c_return events // See :attr-tracing: - gen_counter_incr!(asm, send_cfunc_tracing); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_cfunc_tracing); + return None; } else if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_attrset_block_arg); + return None; } else { let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; - return gen_set_ivar(jit, ctx, asm, comptime_recv, ivar_name, flags, argc); + return gen_set_ivar(jit, asm, comptime_recv, ivar_name, StackOpnd(1), None); } } // Block method, e.g. define_method(:foo) { :my_block } VM_METHOD_TYPE_BMETHOD => { if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_bmethod); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_args_splat_bmethod); + return None; } - return gen_send_bmethod(jit, ctx, asm, ocb, ci, cme, block, flags, argc); - } - VM_METHOD_TYPE_ZSUPER => { - gen_counter_incr!(asm, send_zsuper_method); - return CantCompile; + return gen_send_bmethod(jit, asm, ci, cme, block, flags, argc); } VM_METHOD_TYPE_ALIAS => { // Retrieve the aliased method and re-enter the switch cme = unsafe { rb_aliased_callable_method_entry(cme) }; continue; } - VM_METHOD_TYPE_UNDEF => { - gen_counter_incr!(asm, send_undef_method); - return CantCompile; - } - VM_METHOD_TYPE_NOTIMPLEMENTED => { - gen_counter_incr!(asm, send_not_implemented_method); - return CantCompile; - } // Send family of methods, e.g. call/apply VM_METHOD_TYPE_OPTIMIZED => { if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; - } - - if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_optimized); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_optimized_block_arg); + return None; } let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; match opt_type { OPTIMIZED_METHOD_TYPE_SEND => { - // This is for method calls like `foo.send(:bar)` // The `send` method does not get its own stack frame. // instead we look up the method and call it, // doing some stack shifting based on the VM_CALL_OPT_SEND flag - let starting_context = ctx.clone(); + // Reject nested cases such as `send(:send, :alias_for_send, :foo))`. + // We would need to do some stack manipulation here or keep track of how + // many levels deep we need to stack manipulate. Because of how exits + // currently work, we can't do stack manipulation until we will no longer + // side exit. + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(jit, asm, Counter::send_send_nested); + return None; + } if argc == 0 { - gen_counter_incr!(asm, send_send_wrong_args); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_send_wrong_args); + return None; } argc -= 1; - let compile_time_name = jit_peek_at_stack(jit, ctx, argc as isize); - - if !compile_time_name.string_p() && !compile_time_name.static_sym_p() { - gen_counter_incr!(asm, send_send_chain_not_string_or_sym); - return CantCompile; - } + let compile_time_name = jit.peek_at_stack(&asm.ctx, argc as isize); mid = unsafe { rb_get_symbol_id(compile_time_name) }; if mid == 0 { - gen_counter_incr!(asm, send_send_null_mid); - return CantCompile; + // This also rejects method names that need conversion + gen_counter_incr(jit, asm, Counter::send_send_null_mid); + return None; } cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; if cme.is_null() { - gen_counter_incr!(asm, send_send_null_cme); - return CantCompile; - } - - // We aren't going to handle `send(send(:foo))`. We would need to - // do some stack manipulation here or keep track of how many levels - // deep we need to stack manipulate - // Because of how exits currently work, we can't do stack manipulation - // until we will no longer side exit. - let def_type = unsafe { get_cme_def_type(cme) }; - if let VM_METHOD_TYPE_OPTIMIZED = def_type { - let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; - if let OPTIMIZED_METHOD_TYPE_SEND = opt_type { - gen_counter_incr!(asm, send_send_nested); - return CantCompile; - } + gen_counter_incr(jit, asm, Counter::send_send_null_cme); + return None; } flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND; - assume_method_lookup_stable(jit, ocb, cme); - - let (known_class, type_mismatch_exit) = { - if compile_time_name.string_p() { - ( - unsafe { rb_cString }, - counted_exit!(ocb, side_exit, send_send_chain_not_string), + jit.assume_method_lookup_stable(asm, cme); - ) - } else { - ( - unsafe { rb_cSymbol }, - counted_exit!(ocb, side_exit, send_send_chain_not_sym), - ) - } - }; - - jit_guard_known_klass( - jit, - ctx, + asm_comment!( asm, - ocb, - known_class, - ctx.stack_opnd(argc), - StackOpnd(argc as u16), - compile_time_name, - 2, // We have string or symbol, so max depth is 2 - type_mismatch_exit + "guard sending method name \'{}\'", + unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap_or_else(|| "<unknown>".to_owned()), ); - // Need to do this here so we don't have too many live - // values for the register allocator. - let name_opnd = asm.load(ctx.stack_opnd(argc)); - + let name_opnd = asm.stack_opnd(argc); let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]); - asm.comment("chain_guard_send"); - let chain_exit = counted_exit!(ocb, side_exit, send_send_chain); - asm.cmp(symbol_id_opnd, 0.into()); - asm.jbe(chain_exit.into()); - asm.cmp(symbol_id_opnd, mid.into()); jit_chain_guard( JCC_JNE, jit, - &starting_context, asm, - ocb, - SEND_MAX_CHAIN_DEPTH, - chain_exit, + SEND_MAX_DEPTH, + Counter::guard_send_send_name_chain, ); // We have changed the argc, flags, mid, and cme, so we need to re-enter the match @@ -6078,36 +9366,33 @@ fn gen_send_general( } OPTIMIZED_METHOD_TYPE_CALL => { - if block.is_some() { - gen_counter_incr!(asm, send_call_block); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_call_block); + return None; } if flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(asm, send_call_kwarg); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_call_kwarg); + return None; } - // Optimize for single ractor mode and avoid runtime check for - // "defined with an un-shareable Proc in a different Ractor" - if !assume_single_ractor_mode(jit, ocb) { - gen_counter_incr!(asm, send_call_multi_ractor); - return CantCompile; + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(jit, asm, Counter::send_args_splat_opt_call); + return None; } // If this is a .send call we need to adjust the stack if flags & VM_CALL_OPT_SEND != 0 { - handle_opt_send_shift_stack(asm, argc, ctx); + handle_opt_send_shift_stack(asm, argc); } // About to reset the SP, need to load this here let recv_load = asm.load(recv); - let sp = asm.lea(ctx.sp_opnd(0)); + let sp = asm.lea(asm.ctx.sp_opnd(0)); // Save the PC and SP because the callee can make Ruby calls - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); let kw_splat = flags & VM_CALL_KW_SPLAT; let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)); @@ -6121,49 +9406,44 @@ fn gen_send_general( VM_BLOCK_HANDLER_NONE.into(), ]); - ctx.stack_pop(argc as usize + 1); + asm.stack_pop(argc as usize + 1); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); - return KeepCompiling; + // End the block to allow invalidating the next instruction + return jump_to_next_insn(jit, asm); } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { - gen_counter_incr!(asm, send_optimized_method_block_call); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_optimized_method_block_call); + return None; } OPTIMIZED_METHOD_TYPE_STRUCT_AREF => { if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_aref); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_args_splat_aref); + return None; } return gen_struct_aref( jit, - ctx, asm, - ocb, ci, cme, comptime_recv, - comptime_recv_klass, flags, argc, ); } OPTIMIZED_METHOD_TYPE_STRUCT_ASET => { if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(asm, send_args_splat_aset); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_args_splat_aset); + return None; } return gen_struct_aset( jit, - ctx, asm, - ocb, ci, cme, comptime_recv, - comptime_recv_klass, flags, argc, ); @@ -6173,13 +9453,25 @@ fn gen_send_general( } } } + VM_METHOD_TYPE_ZSUPER => { + gen_counter_incr(jit, asm, Counter::send_zsuper_method); + return None; + } + VM_METHOD_TYPE_UNDEF => { + gen_counter_incr(jit, asm, Counter::send_undef_method); + return None; + } + VM_METHOD_TYPE_NOTIMPLEMENTED => { + gen_counter_incr(jit, asm, Counter::send_not_implemented_method); + return None; + } VM_METHOD_TYPE_MISSING => { - gen_counter_incr!(asm, send_missing_method); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_missing_method); + return None; } VM_METHOD_TYPE_REFINED => { - gen_counter_incr!(asm, send_refined_method); - return CantCompile; + gen_counter_incr(jit, asm, Counter::send_refined_method); + return None; } _ => { unreachable!(); @@ -6188,6 +9480,35 @@ fn gen_send_general( } } +/// Get class name from a class pointer. +fn get_class_name(class: Option<VALUE>) -> String { + class.filter(|&class| { + // type checks for rb_class2name() + unsafe { RB_TYPE_P(class, RUBY_T_MODULE) || RB_TYPE_P(class, RUBY_T_CLASS) } + }).and_then(|class| unsafe { + cstr_to_rust_string(rb_class2name(class)) + }).unwrap_or_else(|| "Unknown".to_string()) +} + +/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID +fn get_method_name(class: Option<VALUE>, mid: u64) -> String { + let class_name = get_class_name(class); + let method_name = if mid != 0 { + unsafe { cstr_to_rust_string(rb_id2name(mid)) } + } else { + None + }.unwrap_or_else(|| "Unknown".to_string()); + format!("{}#{}", class_name, method_name) +} + +/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ +fn get_iseq_name(iseq: IseqPtr) -> String { + let c_string = unsafe { rb_yjit_iseq_inspect(iseq) }; + let string = unsafe { CStr::from_ptr(c_string) }.to_str() + .unwrap_or_else(|_| "not UTF-8").to_string(); + unsafe { ruby_xfree(c_string as *mut c_void); } + string +} /// Shifts the stack for send in order to remove the name of the method /// Comment below borrow from vm_call_opt_send in vm_insnhelper.c @@ -6204,164 +9525,324 @@ fn gen_send_general( ///--+------+--------+------+------ /// /// We do this for our compiletime context and the actual stack -fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32, ctx: &mut Context) { - asm.comment("shift_stack"); +fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32) { + asm_comment!(asm, "shift_stack"); for j in (0..argc).rev() { - let opnd = ctx.stack_opnd(j); - let opnd2 = ctx.stack_opnd(j + 1); + let opnd = asm.stack_opnd(j); + let opnd2 = asm.stack_opnd(j + 1); asm.mov(opnd2, opnd); } - ctx.shift_stack(argc as usize); + asm.shift_stack(argc as usize); } fn gen_opt_send_without_block( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, None) } { + return Some(status); + } - gen_send_general(jit, ctx, asm, ocb, cd, None) + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_opt_send_without_block as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) } fn gen_send( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd = jit_get_arg(jit, 0).as_ptr(); - let block = jit_get_arg(jit, 1).as_optional_ptr(); - return gen_send_general(jit, ctx, asm, ocb, cd, block); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_send as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_sendforward( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of sendforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_sendforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) } fn gen_invokeblock( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokeblock_specialized(jit, asm, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_invokeblock_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokeblock(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_invokeblock as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) +} + +fn gen_invokeblock_specialized( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { + if !jit.at_compile_target() { + return jit.defer_compilation(asm); + } + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(jit, asm, Counter::invokeblock_megamorphic); + return None; } // Get call info - let cd = jit_get_arg(jit, 0).as_ptr(); let ci = unsafe { get_call_data_ci(cd) }; let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let flags = unsafe { vm_ci_flag(ci) }; // Get block_handler - let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) }; + let cfp = jit.get_cfp(); let lep = unsafe { rb_vm_ep_local_ep(get_cfp_ep(cfp)) }; let comptime_handler = unsafe { *lep.offset(VM_ENV_DATA_INDEX_SPECVAL.try_into().unwrap()) }; // Handle each block_handler type if comptime_handler.0 == VM_BLOCK_HANDLER_NONE as usize { // no block given - gen_counter_incr!(asm, invokeblock_none); - CantCompile + gen_counter_incr(jit, asm, Counter::invokeblock_none); + None } else if comptime_handler.0 & 0x3 == 0x1 { // VM_BH_ISEQ_BLOCK_P - asm.comment("get local EP"); + asm_comment!(asm, "get local EP"); let ep_opnd = gen_get_lep(jit, asm); let block_handler_opnd = asm.load( Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) ); - asm.comment("guard block_handler type"); - let side_exit = get_side_exit(jit, ocb, ctx); + asm_comment!(asm, "guard block_handler type"); let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer asm.cmp(tag_opnd, 0x1.into()); // VM_BH_ISEQ_BLOCK_P - asm.jne(counted_exit!(ocb, side_exit, invokeblock_iseq_tag_changed)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // If the current ISEQ is annotated to be inlined but it's not being inlined here, + // generate a dynamic dispatch to avoid making this yield megamorphic. + if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined); + return None; + } - // Not supporting vm_callee_setup_block_arg_arg0_splat for now let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() }; let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() }; - if argc == 1 && unsafe { get_iseq_flags_has_lead(comptime_iseq) && !get_iseq_flags_ambiguous_param0(comptime_iseq) } { - gen_counter_incr!(asm, invokeblock_iseq_arg0_splat); - return CantCompile; - } - asm.comment("guard known ISEQ"); + asm_comment!(asm, "guard known ISEQ"); let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); let iseq_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32 * 2)); - asm.cmp(iseq_opnd, (comptime_iseq as usize).into()); - let block_changed_exit = counted_exit!(ocb, side_exit, invokeblock_iseq_block_changed); + asm.cmp(iseq_opnd, VALUE::from(comptime_iseq).into()); jit_chain_guard( JCC_JNE, jit, - ctx, asm, - ocb, - SEND_MAX_CHAIN_DEPTH, - block_changed_exit, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_iseq_block_changed, ); - gen_send_iseq( + perf_call! { gen_send_iseq(jit, asm, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) } + } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P + // We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet. + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(jit, asm, Counter::invokeblock_ifunc_args_splat); + return None; + } + if flags & VM_CALL_KW_SPLAT != 0 { + gen_counter_incr(jit, asm, Counter::invokeblock_ifunc_kw_splat); + return None; + } + + asm_comment!(asm, "get local EP"); + let ep_opnd = gen_get_lep(jit, asm); + let block_handler_opnd = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + asm_comment!(asm, "guard block_handler type"); + let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer + asm.cmp(tag_opnd, 0x3.into()); // VM_BH_IFUNC_P + jit_chain_guard( + JCC_JNE, jit, - ctx, asm, - ocb, - comptime_iseq, - ci, - VM_FRAME_MAGIC_BLOCK, - None, - 0 as _, - None, - flags, - argc, - Some(captured_opnd), - ) - } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P - gen_counter_incr!(asm, invokeblock_ifunc); - CantCompile + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // The cfunc may not be leaf + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_yield_with_cfunc(ec: EcPtr, captured: *const rb_captured_block, argc: c_int, argv: *const VALUE) -> VALUE; + } + asm_comment!(asm, "call ifunc"); + let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); + let argv = asm.lea(asm.ctx.sp_opnd(-argc)); + let ret = asm.ccall( + rb_vm_yield_with_cfunc as *const u8, + vec![EC, captured_opnd, argc.into(), argv], + ); + + asm.stack_pop(argc.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // cfunc calls may corrupt types + asm.clear_local_types(); + + // Share the successor with other chains + jump_to_next_insn(jit, asm) } else if comptime_handler.symbol_p() { - gen_counter_incr!(asm, invokeblock_symbol); - CantCompile + gen_counter_incr(jit, asm, Counter::invokeblock_symbol); + None } else { // Proc - gen_counter_incr!(asm, invokeblock_proc); - CantCompile + gen_counter_incr(jit, asm, Counter::invokeblock_proc); + None } } fn gen_invokesuper( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); - let block: Option<IseqPtr> = jit_get_arg(jit, 1).as_optional_ptr(); +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) { + return Some(status); + } + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuper + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuper as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_invokesuperforward( + jit: &mut JITState, + asm: &mut Assembler, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuperforward + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuperforward as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_invokesuper_specialized( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { // Defer compilation so we can specialize on class of receiver - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let me = unsafe { rb_vm_frame_method_entry(get_ec_cfp(jit.ec.unwrap())) }; + // Handle the last two branches of vm_caller_setup_arg_block + let block = if let Some(iseq) = jit.get_arg(1).as_optional_ptr() { + BlockHandler::BlockISeq(iseq) + } else { + BlockHandler::LEPSpecVal + }; + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(jit, asm, Counter::invokesuper_megamorphic); + return None; + } + + let me = unsafe { rb_vm_frame_method_entry(jit.get_cfp()) }; if me.is_null() { - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_no_me); + return None; } // FIXME: We should track and invalidate this block when this cme is invalidated let current_defined_class = unsafe { (*me).defined_class }; let mid = unsafe { get_def_original_id((*me).def) }; - if me != unsafe { rb_callable_method_entry(current_defined_class, (*me).called_id) } { - // Though we likely could generate this call, as we are only concerned - // with the method entry remaining valid, assume_method_lookup_stable - // below requires that the method lookup matches as well - return CantCompile; - } - // vm_search_normal_superclass let rbasic_ptr: *const RBasic = current_defined_class.as_ptr(); if current_defined_class.builtin_type() == RUBY_T_ICLASS && unsafe { RB_TYPE_P((*rbasic_ptr).klass, RUBY_T_MODULE) && FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT.as_usize())) != VALUE(0) } { - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_refinement); + return None; } let comptime_superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; @@ -6375,16 +9856,16 @@ fn gen_invokesuper( // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. if ci_flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(asm, send_keywords); - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_kwarg); + return None; } if ci_flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(asm, send_kw_splat); - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_kw_splat); + return None; } - if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(asm, send_block_arg); - return CantCompile; + if ci_flags & VM_CALL_FORWARDING != 0 { + gen_counter_incr(jit, asm, Counter::invokesuper_forwarding); + return None; } // Ensure we haven't rebound this method onto an incompatible class. @@ -6392,267 +9873,259 @@ fn gen_invokesuper( // cheaper calculations first, but since we specialize on the method entry // and so only have to do this once at compile time this is fine to always // check and side exit. - let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize); + let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize); if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) { - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_defined_class_mismatch); + return None; + } + + // Don't compile `super` on objects with singleton class to avoid retaining the receiver. + if VALUE(0) != unsafe { FL_TEST(comptime_recv.class_of(), VALUE(RUBY_FL_SINGLETON as usize)) } { + gen_counter_incr(jit, asm, Counter::invokesuper_singleton_class); + return None; } // Do method lookup let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) }; - if cme.is_null() { - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_no_cme); + return None; } // Check that we'll be able to write this method dispatch before generating checks let cme_def_type = unsafe { get_cme_def_type(cme) }; if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC { // others unimplemented - return CantCompile; + gen_counter_incr(jit, asm, Counter::invokesuper_not_iseq_or_cfunc); + return None; } - // Guard that the receiver has the same class as the one from compile time - let side_exit = get_side_exit(jit, ocb, ctx); - - let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) }; - let ep = unsafe { get_cfp_ep(cfp) }; - let cref_me = unsafe { *ep.offset(VM_ENV_DATA_INDEX_ME_CREF.try_into().unwrap()) }; - let me_as_value = VALUE(me as usize); - if cref_me != me_as_value { - // This will be the case for super within a block - return CantCompile; - } - - asm.comment("guard known me"); - let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + asm_comment!(asm, "guard known me"); + let lep_opnd = gen_get_lep(jit, asm); let ep_me_opnd = Opnd::mem( 64, - ep_opnd, + lep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, ); + + let me_as_value = VALUE(me as usize); asm.cmp(ep_me_opnd, me_as_value.into()); - asm.jne(counted_exit!(ocb, side_exit, invokesuper_me_changed)); - - if block.is_none() { - // Guard no block passed - // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE - // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep)) - // - // TODO: this could properly forward the current block handler, but - // would require changes to gen_send_* - asm.comment("guard no block given"); - // EP is in REG0 from above - let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); - let ep_specval_opnd = Opnd::mem( - 64, - ep_opnd, - SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, - ); - asm.cmp(ep_specval_opnd, VM_BLOCK_HANDLER_NONE.into()); - asm.jne(counted_exit!(ocb, side_exit, invokesuper_block)); - } + jit_chain_guard( + JCC_JNE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::guard_invokesuper_me_changed, + ); // We need to assume that both our current method entry and the super // method entry we invoke remain stable - assume_method_lookup_stable(jit, ocb, me); - assume_method_lookup_stable(jit, ocb, cme); + jit.assume_method_lookup_stable(asm, me); + jit.assume_method_lookup_stable(asm, cme); // Method calls may corrupt types - ctx.clear_local_types(); + asm.clear_local_types(); match cme_def_type { VM_METHOD_TYPE_ISEQ => { let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, ci_flags, argc, None) + perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None) } } VM_METHOD_TYPE_CFUNC => { - gen_send_cfunc(jit, ctx, asm, ocb, ci, cme, block, ptr::null(), ci_flags, argc) + perf_call! { gen_send_cfunc(jit, asm, ci, cme, Some(block), None, ci_flags, argc) } } _ => unreachable!(), } } fn gen_leave( - jit: &mut JITState, - ctx: &mut Context, + _jit: &mut JITState, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Only the return value should be on the stack - assert_eq!(1, ctx.get_stack_size()); - - // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - let ocb_asm = Assembler::new(); + assert_eq!(1, asm.ctx.get_stack_size(), "leave instruction expects stack size 1, but was: {}", asm.ctx.get_stack_size()); // Check for interrupts - gen_check_ints(asm, counted_exit!(ocb, side_exit, leave_se_interrupt)); - ocb_asm.compile(ocb.unwrap()); + gen_check_ints(asm, Counter::leave_se_interrupt); // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP - asm.comment("pop stack frame"); + asm_comment!(asm, "pop stack frame"); let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, incr_cfp); - asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); // Load the return value - let retval_opnd = ctx.stack_pop(1); + let retval_opnd = asm.stack_pop(1); - // Move the return value into the C return register for gen_leave_exit() + // Move the return value into the C return register asm.mov(C_RET_OPND, retval_opnd); - // Reload REG_SP for the caller and write the return value. - // Top of the stack is REG_SP[0] since the caller has sp_offset=1. - asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); - asm.mov(Opnd::mem(64, SP, 0), C_RET_OPND); - - // Jump to the JIT return address on the frame that was just popped + // Jump to the JIT return address on the frame that was just popped. + // There are a few possible jump targets: + // - gen_leave_exit() and gen_leave_exception(), for C callers + // - Return context set up by gen_send_iseq() + // We don't write the return value to stack memory like the interpreter here. + // Each jump target do it as necessary. let offset_to_jit_return = -(RUBY_SIZEOF_CONTROL_FRAME as i32) + RUBY_OFFSET_CFP_JIT_RETURN; asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return)); - EndBlock + Some(EndBlock) } fn gen_getglobal( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let gid = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); // Save the PC and SP because we might make a Ruby call for warning - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); let val_opnd = asm.ccall( rb_gvar_get as *const u8, vec![ gid.into() ] ); - let top = ctx.stack_push(Type::Unknown); + let top = asm.stack_push(Type::Unknown); asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_setglobal( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let gid = jit_get_arg(jit, 0).as_usize(); +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); + let val = asm.stack_opnd(0); asm.ccall( rb_gvar_set as *const u8, vec![ gid.into(), - ctx.stack_pop(1), + val, ], ); + asm.stack_pop(1); // Keep it during ccall for GC - KeepCompiling + Some(KeepCompiling) } fn gen_anytostring( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Save the PC and SP since we might call #to_s - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); - let str = ctx.stack_pop(1); - let val = ctx.stack_pop(1); + let str = asm.stack_opnd(0); + let val = asm.stack_opnd(1); let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); + asm.stack_pop(2); // Keep them during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::TString); + let stack_ret = asm.stack_push(Type::TString); asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } fn gen_objtostring( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; +) -> Option<CodegenStatus> { + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let recv = ctx.stack_opnd(0); - let comptime_recv = jit_peek_at_stack(jit, ctx, 0); + let recv = asm.stack_opnd(0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } { - let side_exit = get_side_exit(jit, ocb, ctx); - jit_guard_known_klass( jit, - ctx, asm, - ocb, - comptime_recv.class_of(), recv, - StackOpnd(0), + recv.into(), comptime_recv, SEND_MAX_DEPTH, - side_exit, + Counter::objtostring_not_string, ); + // No work needed. The string value is already on the top of the stack. - KeepCompiling + Some(KeepCompiling) + } else if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_SYMBOL) } && assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(to_s)) { + jit_guard_known_klass( + jit, + asm, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::objtostring_not_string, + ); + + extern "C" { + fn rb_sym2str(sym: VALUE) -> VALUE; + } + + // Same optimization done in the interpreter: rb_sym_to_s() allocates a mutable string, but since we are only + // going to use this string for interpolation, it's fine to use the + // frozen string. + // rb_sym2str does not allocate. + let sym = recv; + let str = asm.ccall(rb_sym2str as *const u8, vec![sym]); + asm.stack_pop(1); + + // Push the return value + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, str); + + Some(KeepCompiling) } else { - let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, asm, ocb, cd, None) + let cd = jit.get_arg(0).as_ptr(); + perf_call! { gen_send_general(jit, asm, cd, None) } } } fn gen_intern( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); - let str = ctx.stack_pop(1); + let str = asm.stack_opnd(0); let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); + asm.stack_pop(1); // Keep it during ccall for GC // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, sym); - KeepCompiling + Some(KeepCompiling) } fn gen_toregexp( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let opt = jit_get_arg(jit, 0).as_i64(); - let cnt = jit_get_arg(jit, 1).as_usize(); +) -> Option<CodegenStatus> { + let opt = jit.get_arg(0).as_i64(); + let cnt = jit.get_arg(1).as_usize(); // Save the PC and SP because this allocates an object and could // raise an exception. - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); - let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)))); - ctx.stack_pop(cnt); + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32))); let ary = asm.ccall( rb_ary_tmp_new_from_values as *const u8, @@ -6662,6 +10135,7 @@ fn gen_toregexp( values_ptr, ] ); + asm.stack_pop(cnt); // Let ccall spill them // Save the array so we can clear it later asm.cpush(ary); @@ -6681,77 +10155,75 @@ fn gen_toregexp( asm.cpop_into(ary); // The value we want to push on the stack is in RAX right now - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::UnknownHeap); asm.mov(stack_ret, val); // Clear the temp array. asm.ccall(rb_ary_clear as *const u8, vec![ary]); - KeepCompiling + Some(KeepCompiling) } fn gen_getspecial( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // This takes two arguments, key and type // key is only used when type == 0 // A non-zero type determines which type of backref to fetch - //rb_num_t key = jit_get_arg(jit, 0); - let rtype = jit_get_arg(jit, 1).as_u64(); + //rb_num_t key = jit.jit_get_arg(0); + let rtype = jit.get_arg(1).as_u64(); if rtype == 0 { // not yet implemented - return CantCompile; + return None; } else if rtype & 0x01 != 0 { // Fetch a "special" backref based on a char encoded by shifting by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // call rb_backref_get() - asm.comment("rb_backref_get"); + asm_comment!(asm, "rb_backref_get"); let backref = asm.ccall(rb_backref_get as *const u8, vec![]); let rt_u8: u8 = (rtype >> 1).try_into().unwrap(); let val = match rt_u8.into() { '&' => { - asm.comment("rb_reg_last_match"); + asm_comment!(asm, "rb_reg_last_match"); asm.ccall(rb_reg_last_match as *const u8, vec![backref]) } '`' => { - asm.comment("rb_reg_match_pre"); + asm_comment!(asm, "rb_reg_match_pre"); asm.ccall(rb_reg_match_pre as *const u8, vec![backref]) } '\'' => { - asm.comment("rb_reg_match_post"); + asm_comment!(asm, "rb_reg_match_post"); asm.ccall(rb_reg_match_post as *const u8, vec![backref]) } '+' => { - asm.comment("rb_reg_match_last"); + asm_comment!(asm, "rb_reg_match_last"); asm.ccall(rb_reg_match_last as *const u8, vec![backref]) } _ => panic!("invalid back-ref"), }; - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } else { // Fetch the N-th match from the last backref based on type shifted by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // call rb_backref_get() - asm.comment("rb_backref_get"); + asm_comment!(asm, "rb_backref_get"); let backref = asm.ccall(rb_backref_get as *const u8, vec![]); // rb_reg_nth_match((int)(type >> 1), backref); - asm.comment("rb_reg_nth_match"); + asm_comment!(asm, "rb_reg_nth_match"); let val = asm.ccall( rb_reg_nth_match as *const u8, vec![ @@ -6760,75 +10232,71 @@ fn gen_getspecial( ] ); - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } } fn gen_getclassvariable( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // rb_vm_getclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); let val_opnd = asm.ccall( rb_vm_getclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, - Opnd::UImm(jit_get_arg(jit, 0).as_u64()), - Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + Opnd::UImm(jit.get_arg(0).as_u64()), + Opnd::UImm(jit.get_arg(1).as_u64()), ], ); - let top = ctx.stack_push(Type::Unknown); + let top = asm.stack_push(Type::Unknown); asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_setclassvariable( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // rb_vm_setclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); + let val = asm.stack_opnd(0); asm.ccall( rb_vm_setclassvariable as *const u8, vec![ - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + VALUE(jit.iseq as usize).into(), CFP, - Opnd::UImm(jit_get_arg(jit, 0).as_u64()), - ctx.stack_pop(1), - Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + Opnd::UImm(jit.get_arg(0).as_u64()), + val, + Opnd::UImm(jit.get_arg(1).as_u64()), ], ); + asm.stack_pop(1); // Keep it during ccall for GC - KeepCompiling + Some(KeepCompiling) } fn gen_getconstant( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { - let id = jit_get_arg(jit, 0).as_usize(); + let id = jit.get_arg(0).as_usize(); // vm_get_ev_const can raise exceptions. - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); - let allow_nil_opnd = ctx.stack_pop(1); - let klass_opnd = ctx.stack_pop(1); + let allow_nil_opnd = asm.stack_opnd(0); + let klass_opnd = asm.stack_opnd(1); extern "C" { fn rb_vm_get_ev_const(ec: EcPtr, klass: VALUE, id: ID, allow_nil: VALUE) -> VALUE; @@ -6843,42 +10311,58 @@ fn gen_getconstant( allow_nil_opnd ], ); + asm.stack_pop(2); // Keep them during ccall for GC - let top = ctx.stack_push(Type::Unknown); + let top = asm.stack_push(Type::Unknown); asm.mov(top, val_opnd); - KeepCompiling + Some(KeepCompiling) } fn gen_opt_getconstant_path( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - let const_cache_as_value = jit_get_arg(jit, 0); +) -> Option<CodegenStatus> { + let const_cache_as_value = jit.get_arg(0); let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr(); let idlist: *const ID = unsafe { (*ic).segments }; + // Make sure there is an exit for this block as the interpreter might want + // to invalidate this block from yjit_constant_ic_update(). + jit_ensure_block_entry_exit(jit, asm)?; + // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update(). + // If a cache is not filled, fallback to the general C call. let ice = unsafe { (*ic).entry }; if ice.is_null() { - // In this case, leave a block that unconditionally side exits - // for the interpreter to invalidate. - return CantCompile; + // Prepare for const_missing + jit_prepare_non_leaf_call(jit, asm); + + // If this does not trigger const_missing, vm_ic_update will invalidate this block. + extern "C" { + fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const u8) -> VALUE; + } + let val = asm.ccall( + rb_vm_opt_getconstant_path as *const u8, + vec![EC, CFP, Opnd::const_ptr(ic as *const u8)], + ); + + let stack_top = asm.stack_push(Type::Unknown); + asm.store(stack_top, val); + + return jump_to_next_insn(jit, asm); } - // Make sure there is an exit for this block as the interpreter might want - // to invalidate this block from yjit_constant_ic_update(). - jit_ensure_block_entry_exit(jit, ocb); + let cref_sensitive = !unsafe { (*ice).ic_cref }.is_null(); + let is_shareable = unsafe { rb_yjit_constcache_shareable(ice) }; + let needs_checks = cref_sensitive || (!is_shareable && !assume_single_ractor_mode(jit, asm)); - if !unsafe { (*ice).ic_cref }.is_null() { + if needs_checks { // Cache is keyed on a certain lexical scope. Use the interpreter's cache. - let side_exit = get_side_exit(jit, ocb, ctx); - let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); // Call function to verify the cache. It doesn't allocate or call methods. + // This includes a check for Ractor safety let ret_val = asm.ccall( rb_vm_ic_hit_p as *const u8, vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] @@ -6887,7 +10371,7 @@ fn gen_opt_getconstant_path( // Check the result. SysV only specifies one byte for _Bool return values, // so it's important we only check one bit to ignore the higher bits in the register. asm.test(ret_val, 1.into()); - asm.jz(counted_exit!(ocb, side_exit, opt_getinlinecache_miss)); + asm.jz(Target::side_exit(Counter::opt_getconstant_path_ic_miss)); let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); @@ -6904,24 +10388,17 @@ fn gen_opt_getconstant_path( )); // Push ic->entry->value - let stack_top = ctx.stack_push(Type::Unknown); + let stack_top = asm.stack_push(Type::Unknown); asm.store(stack_top, ic_entry_val); } else { - // Optimize for single ractor mode. - // FIXME: This leaks when st_insert raises NoMemoryError - if !assume_single_ractor_mode(jit, ocb) { - return CantCompile; - } - // Invalidate output code on any constant writes associated with // constants referenced within the current block. - assume_stable_constant_names(jit, ocb, idlist); + jit.assume_stable_constant_names(asm, idlist); - jit_putobject(jit, ctx, asm, unsafe { (*ice).value }); + jit_putobject(asm, unsafe { (*ice).value }); } - jump_to_next_insn(jit, ctx, asm, ocb); - EndBlock + jump_to_next_insn(jit, asm) } // Push the explicit block parameter onto the temporary stack. Part of the @@ -6929,31 +10406,27 @@ fn gen_opt_getconstant_path( // explicit block parameters. fn gen_getblockparamproxy( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { - if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, asm, ocb); - return EndBlock; +) -> Option<CodegenStatus> { + if !jit.at_compile_target() { + return jit.defer_compilation(asm); } - let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard - - // A mirror of the interpreter code. Checking for the case - // where it's pushing rb_block_param_proxy. - let side_exit = get_side_exit(jit, ocb, ctx); - // EP level - let level = jit_get_arg(jit, 1).as_u32(); + let level = jit.get_arg(1).as_u32(); // Peek at the block handler so we can check whether it's nil - let comptime_handler = jit_peek_at_block_handler(jit, level); + let comptime_handler = jit.peek_at_block_handler(level); - // When a block handler is present, it should always be a GC-guarded - // pointer (VM_BH_ISEQ_BLOCK_P) - if comptime_handler.as_u64() != 0 && comptime_handler.as_u64() & 0x3 != 0x1 { - return CantCompile; + // Filter for the 4 cases we currently handle + if !(comptime_handler.as_u64() == 0 || // no block given + comptime_handler.as_u64() & 0x3 == 0x1 || // iseq block (no associated GC managed object) + comptime_handler.as_u64() & 0x3 == 0x3 || // ifunc block (no associated GC managed object) + unsafe { rb_obj_is_proc(comptime_handler) }.test() // block is a Proc + ) { + // Missing the symbol case, where we basically need to call Symbol#to_proc at runtime + gen_counter_incr(jit, asm, Counter::gbpp_unsupported_type); + return None; } // Load environment pointer EP from CFP @@ -6966,7 +10439,7 @@ fn gen_getblockparamproxy( SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), ); asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); - asm.jnz(counted_exit!(ocb, side_exit, gbpp_block_param_modified)); + asm.jnz(Target::side_exit(Counter::gbpp_block_param_modified)); // Load the block handler for the current frame // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) @@ -6974,7 +10447,12 @@ fn gen_getblockparamproxy( Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) ); - // Specialize compilation for the case where no block handler is present + // Use block handler sample to guide specialization... + // NOTE: we use jit_chain_guard() in this decision tree, and since + // there are only a few cases, it should never reach the depth limit use + // the exit counter we pass to it. + // + // No block given if comptime_handler.as_u64() == 0 { // Bail if there is a block handler asm.cmp(block_handler, Opnd::UImm(0)); @@ -6982,58 +10460,85 @@ fn gen_getblockparamproxy( jit_chain_guard( JCC_JNZ, jit, - &starting_context, asm, - ocb, SEND_MAX_DEPTH, - side_exit, + Counter::gbpp_block_handler_not_none, ); - jit_putobject(jit, ctx, asm, Qnil); - } else { - // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P(). - let block_handler = asm.and(block_handler, 0x3.into()); - - // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null. - asm.cmp(block_handler, 0x1.into()); - + jit_putobject(asm, Qnil); + } else if comptime_handler.as_u64() & 0x1 == 0x1 { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + // Procs are aligned heap pointers so testing the bit rejects them too. + + asm.test(block_handler, 0x1.into()); jit_chain_guard( - JCC_JNZ, + JCC_JZ, jit, - &starting_context, asm, - ocb, SEND_MAX_DEPTH, - side_exit, + Counter::gbpp_block_handler_not_iseq, ); // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. assert!(!unsafe { rb_block_param_proxy }.special_const_p()); - let top = ctx.stack_push(Type::BlockParamProxy); + let top = asm.stack_push(Type::BlockParamProxy); asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); - } + } else if unsafe { rb_obj_is_proc(comptime_handler) }.test() { + // The block parameter is a Proc + c_callable! { + // We can't hold values across C calls due to a backend limitation, + // so we'll use this thin wrapper around rb_obj_is_proc(). + fn is_proc(object: VALUE) -> VALUE { + if unsafe { rb_obj_is_proc(object) }.test() { + // VM_BH_TO_PROC() is the identify function. + object + } else { + Qfalse + } + } + } + + // Simple predicate, no need to jit_prepare_non_leaf_call() + let proc_or_false = asm.ccall(is_proc as _, vec![block_handler]); - jump_to_next_insn(jit, ctx, asm, ocb); + // Guard for proc + asm.cmp(proc_or_false, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_proc, + ); - EndBlock + let top = asm.stack_push(Type::Unknown); + asm.mov(top, proc_or_false); + } else { + unreachable!("absurd given initial filtering"); + } + + jump_to_next_insn(jit, asm) } fn gen_getblockparam( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, -) -> CodegenStatus { +) -> Option<CodegenStatus> { // EP level - let level = jit_get_arg(jit, 1).as_u32(); + let level = jit.get_arg(1).as_u32(); // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_call_with_gc(jit, asm); + asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency. // A mirror of the interpreter code. Checking for the case // where it's pushing rb_block_param_proxy. - let side_exit = get_side_exit(jit, ocb, ctx); // Load environment pointer EP from CFP let ep_opnd = gen_get_ep(asm, level); @@ -7061,7 +10566,7 @@ fn gen_getblockparam( asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - asm.jnz(side_exit); + asm.jnz(Target::side_exit(Counter::gbp_wb_required)); // Convert the block handler in to a proc // call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler) @@ -7083,7 +10588,7 @@ fn gen_getblockparam( let ep_opnd = gen_get_ep(asm, level); // Write the value at the environment pointer - let idx = jit_get_arg(jit, 0).as_i32(); + let idx = jit.get_arg(0).as_i32(); let offs = -(SIZEOF_VALUE_I32 * idx); asm.mov(Opnd::mem(64, ep_opnd, offs), proc); @@ -7095,47 +10600,46 @@ fn gen_getblockparam( asm.write_label(frame_flag_modified); // Push the proc on the stack - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); let ep_opnd = gen_get_ep(asm, level); asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); - KeepCompiling + Some(KeepCompiling) } fn gen_invokebuiltin( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc"); // ec, self, and arguments if bf_argc + 2 > C_ARG_OPNDS.len() { - return CantCompile; + incr_counter!(invokebuiltin_too_many_args); + return None; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals for i in 0..bf_argc { - let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32); + let stack_opnd = asm.stack_opnd((bf_argc - i - 1) as i32); args.push(stack_opnd); } let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value - ctx.stack_pop(bf_argc); - let stack_ret = ctx.stack_push(Type::Unknown); + asm.stack_pop(bf_argc); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } // opt_invokebuiltin_delegate calls a builtin function, like @@ -7143,21 +10647,20 @@ fn gen_invokebuiltin( // stack uses the argument locals (and self) from the current method. fn gen_opt_invokebuiltin_delegate( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); let bf_argc = unsafe { (*bf).argc }; - let start_index = jit_get_arg(jit, 1).as_i32(); + let start_index = jit.get_arg(1).as_i32(); // ec, self, and arguments if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) { - return CantCompile; + incr_counter!(invokebuiltin_too_many_args); + return None; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, asm); + jit_prepare_non_leaf_call(jit, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; @@ -7177,10 +10680,10 @@ fn gen_opt_invokebuiltin_delegate( let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value - let stack_ret = ctx.stack_push(Type::Unknown); + let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, val); - KeepCompiling + Some(KeepCompiling) } /// Maps a YARV opcode to a code generation function (if supported) @@ -7195,6 +10698,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_dup => Some(gen_dup), YARVINSN_dupn => Some(gen_dupn), YARVINSN_swap => Some(gen_swap), + YARVINSN_opt_reverse => Some(gen_opt_reverse), YARVINSN_putnil => Some(gen_putnil), YARVINSN_putobject => Some(gen_putobject), YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), @@ -7225,16 +10729,24 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_opt_gt => Some(gen_opt_gt), YARVINSN_opt_ge => Some(gen_opt_ge), YARVINSN_opt_mod => Some(gen_opt_mod), + YARVINSN_opt_ary_freeze => Some(gen_opt_ary_freeze), + YARVINSN_opt_hash_freeze => Some(gen_opt_hash_freeze), YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), - YARVINSN_opt_newarray_max => Some(gen_opt_newarray_max), - YARVINSN_opt_newarray_min => Some(gen_opt_newarray_min), + YARVINSN_opt_duparray_send => Some(gen_opt_duparray_send), + YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send), YARVINSN_splatarray => Some(gen_splatarray), + YARVINSN_splatkw => Some(gen_splatkw), YARVINSN_concatarray => Some(gen_concatarray), + YARVINSN_concattoarray => Some(gen_concattoarray), + YARVINSN_pushtoarray => Some(gen_pushtoarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), + YARVINSN_putchilledstring => Some(gen_putchilledstring), YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), + YARVINSN_definedivar => Some(gen_definedivar), + YARVINSN_checkmatch => Some(gen_checkmatch), YARVINSN_checkkeyword => Some(gen_checkkeyword), YARVINSN_concatstrings => Some(gen_concatstrings), YARVINSN_getinstancevariable => Some(gen_getinstancevariable), @@ -7263,14 +10775,18 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { YARVINSN_branchif => Some(gen_branchif), YARVINSN_branchunless => Some(gen_branchunless), YARVINSN_branchnil => Some(gen_branchnil), + YARVINSN_throw => Some(gen_throw), YARVINSN_jump => Some(gen_jump), + YARVINSN_opt_new => Some(gen_opt_new), YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), YARVINSN_getblockparam => Some(gen_getblockparam), YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), YARVINSN_send => Some(gen_send), + YARVINSN_sendforward => Some(gen_sendforward), YARVINSN_invokeblock => Some(gen_invokeblock), YARVINSN_invokesuper => Some(gen_invokesuper), + YARVINSN_invokesuperforward => Some(gen_invokesuperforward), YARVINSN_leave => Some(gen_leave), YARVINSN_getglobal => Some(gen_getglobal), @@ -7288,23 +10804,134 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { } } -// Return true when the codegen function generates code. -// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass(). -// See yjit_reg_method(). +/// Return true when the codegen function generates code. +/// known_recv_class has Some value when the caller has used jit_guard_known_klass(). +/// See [reg_method_codegen] type MethodGenFn = fn( jit: &mut JITState, - ctx: &mut Context, asm: &mut Assembler, - ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, - block: Option<IseqPtr>, + block: Option<BlockHandler>, argc: i32, - known_recv_class: *const VALUE, + known_recv_class: Option<VALUE>, ) -> bool; +/// Methods for generating code for hardcoded (usually C) methods +static mut METHOD_CODEGEN_TABLE: Option<HashMap<usize, MethodGenFn>> = None; + +/// Register codegen functions for some Ruby core methods +pub fn yjit_reg_method_codegen_fns() { + unsafe { + assert!(METHOD_CODEGEN_TABLE.is_none()); + METHOD_CODEGEN_TABLE = Some(HashMap::default()); + + // Specialization for C methods. See the function's docs for details. + reg_method_codegen(rb_cBasicObject, "!", jit_rb_obj_not); + + reg_method_codegen(rb_cNilClass, "nil?", jit_rb_true); + reg_method_codegen(rb_mKernel, "nil?", jit_rb_false); + reg_method_codegen(rb_mKernel, "is_a?", jit_rb_kernel_is_a); + reg_method_codegen(rb_mKernel, "kind_of?", jit_rb_kernel_is_a); + reg_method_codegen(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of); + + reg_method_codegen(rb_cBasicObject, "==", jit_rb_obj_equal); + reg_method_codegen(rb_cBasicObject, "equal?", jit_rb_obj_equal); + reg_method_codegen(rb_cBasicObject, "!=", jit_rb_obj_not_equal); + reg_method_codegen(rb_mKernel, "eql?", jit_rb_obj_equal); + reg_method_codegen(rb_cModule, "==", jit_rb_obj_equal); + reg_method_codegen(rb_cModule, "===", jit_rb_mod_eqq); + reg_method_codegen(rb_cModule, "name", jit_rb_mod_name); + reg_method_codegen(rb_cSymbol, "==", jit_rb_obj_equal); + reg_method_codegen(rb_cSymbol, "===", jit_rb_obj_equal); + reg_method_codegen(rb_cInteger, "==", jit_rb_int_equal); + reg_method_codegen(rb_cInteger, "===", jit_rb_int_equal); + + reg_method_codegen(rb_cInteger, "succ", jit_rb_int_succ); + reg_method_codegen(rb_cInteger, "pred", jit_rb_int_pred); + reg_method_codegen(rb_cInteger, "/", jit_rb_int_div); + reg_method_codegen(rb_cInteger, "<<", jit_rb_int_lshift); + reg_method_codegen(rb_cInteger, ">>", jit_rb_int_rshift); + reg_method_codegen(rb_cInteger, "^", jit_rb_int_xor); + reg_method_codegen(rb_cInteger, "[]", jit_rb_int_aref); + + reg_method_codegen(rb_cFloat, "+", jit_rb_float_plus); + reg_method_codegen(rb_cFloat, "-", jit_rb_float_minus); + reg_method_codegen(rb_cFloat, "*", jit_rb_float_mul); + reg_method_codegen(rb_cFloat, "/", jit_rb_float_div); + + reg_method_codegen(rb_cString, "dup", jit_rb_str_dup); + reg_method_codegen(rb_cString, "empty?", jit_rb_str_empty_p); + reg_method_codegen(rb_cString, "to_s", jit_rb_str_to_s); + reg_method_codegen(rb_cString, "to_str", jit_rb_str_to_s); + reg_method_codegen(rb_cString, "length", jit_rb_str_length); + reg_method_codegen(rb_cString, "size", jit_rb_str_length); + reg_method_codegen(rb_cString, "bytesize", jit_rb_str_bytesize); + reg_method_codegen(rb_cString, "getbyte", jit_rb_str_getbyte); + reg_method_codegen(rb_cString, "setbyte", jit_rb_str_setbyte); + reg_method_codegen(rb_cString, "byteslice", jit_rb_str_byteslice); + reg_method_codegen(rb_cString, "[]", jit_rb_str_aref_m); + reg_method_codegen(rb_cString, "slice", jit_rb_str_aref_m); + reg_method_codegen(rb_cString, "<<", jit_rb_str_concat); + reg_method_codegen(rb_cString, "+@", jit_rb_str_uplus); + + reg_method_codegen(rb_cNilClass, "===", jit_rb_case_equal); + reg_method_codegen(rb_cTrueClass, "===", jit_rb_case_equal); + reg_method_codegen(rb_cFalseClass, "===", jit_rb_case_equal); + + reg_method_codegen(rb_cArray, "empty?", jit_rb_ary_empty_p); + reg_method_codegen(rb_cArray, "length", jit_rb_ary_length); + reg_method_codegen(rb_cArray, "size", jit_rb_ary_length); + reg_method_codegen(rb_cArray, "<<", jit_rb_ary_push); + + reg_method_codegen(rb_cHash, "empty?", jit_rb_hash_empty_p); + + reg_method_codegen(rb_mKernel, "respond_to?", jit_obj_respond_to); + reg_method_codegen(rb_mKernel, "block_given?", jit_rb_f_block_given_p); + reg_method_codegen(rb_mKernel, "dup", jit_rb_obj_dup); + + reg_method_codegen(rb_cClass, "superclass", jit_rb_class_superclass); + + reg_method_codegen(rb_singleton_class(rb_cThread), "current", jit_thread_s_current); + } +} + +/// Register a specialized codegen function for a particular method. Note that +/// if the function returns true, the code it generates runs without a +/// control frame and without interrupt checks, completely substituting the +/// original implementation of the method. To avoid creating observable +/// behavior changes, prefer targeting simple code paths that do not allocate +/// and do not make method calls. +/// +/// See also: [lookup_cfunc_codegen]. +fn reg_method_codegen(klass: VALUE, method_name: &str, gen_fn: MethodGenFn) { + let mid = unsafe { rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap()) }; + let me = unsafe { rb_method_entry_at(klass, mid) }; + + if me.is_null() { + panic!("undefined optimized method!: {method_name}"); + } + + // For now, only cfuncs are supported (me->cme cast fine since it's just me->def->type). + debug_assert_eq!(VM_METHOD_TYPE_CFUNC, unsafe { get_cme_def_type(me.cast()) }); + + let method_serial = unsafe { + let def = (*me).def; + get_def_method_serial(def) + }; + + unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); } +} + +pub fn yjit_shutdown_free_codegen_table() { + unsafe { METHOD_CODEGEN_TABLE = None; }; +} + /// Global state needed for code generation pub struct CodegenGlobals { + /// Flat vector of bits to store compressed context data + context_data: BitVector, + /// Inline code block (fast path) inline_cb: CodeBlock, @@ -7314,36 +10941,31 @@ pub struct CodegenGlobals { /// Code for exiting back to the interpreter from the leave instruction leave_exit_code: CodePtr, + /// Code for exiting back to the interpreter after handling an exception + leave_exception_code: CodePtr, + // For exiting from YJIT frame from branch_stub_hit(). - // Filled by gen_code_for_exit_from_stub(). + // Filled by gen_stub_exit(). stub_exit_code: CodePtr, // For servicing branch stubs branch_stub_hit_trampoline: CodePtr, + // For servicing entry stubs + entry_stub_hit_trampoline: CodePtr, + // Code for full logic of returning from C method and exiting to the interpreter outline_full_cfunc_return_pos: CodePtr, /// For implementing global code invalidation global_inval_patches: Vec<CodepagePatch>, - /// For implementing global code invalidation. The number of bytes counting from the beginning - /// of the inline code block that should not be changed. After patching for global invalidation, - /// no one should make changes to the invalidated code region anymore. This is used to - /// break out of invalidation race when there are multiple ractors. - inline_frozen_bytes: usize, - - // Methods for generating code for hardcoded (usually C) methods - method_codegen_table: HashMap<usize, MethodGenFn>, - /// Page indexes for outlined code that are not associated to any ISEQ. ocb_pages: Vec<usize>, - /// Freed page indexes. None if code GC has not been used. - freed_pages: Option<Vec<usize>>, - - /// How many times code GC has been executed. - code_gc_count: usize, + /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push + /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap. + pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>, } /// For implementing global code invalidation. A position in the inline @@ -7361,15 +10983,11 @@ impl CodegenGlobals { /// Initialize the codegen globals pub fn init() { // Executable memory and code page size in bytes - let mem_size = get_option!(exec_mem_size); - + let exec_mem_size = get_option!(exec_mem_size).unwrap_or(get_option!(mem_size)); #[cfg(not(test))] let (mut cb, mut ocb) = { - use std::cell::RefCell; - use std::rc::Rc; - - let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; + let virt_block: *mut u8 = unsafe { rb_jit_reserve_addr_space(exec_mem_size as u32) }; // Memory protection syscalls need page-aligned addresses, so check it here. Assuming // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the @@ -7378,7 +10996,7 @@ impl CodegenGlobals { // // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. - let page_size = unsafe { rb_yjit_get_page_size() }; + let page_size = unsafe { rb_jit_get_page_size() }; assert_eq!( virt_block as usize % page_size.as_usize(), 0, "Start of virtual address block should be page-aligned", @@ -7391,14 +11009,16 @@ impl CodegenGlobals { SystemAllocator {}, page_size, NonNull::new(virt_block).unwrap(), - mem_size, + exec_mem_size, + get_option!(mem_size), ); - let mem_block = Rc::new(RefCell::new(mem_block)); + let mem_block = Rc::new(mem_block); - let cb = CodeBlock::new(mem_block.clone(), false); - let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true)); + let freed_pages = Rc::new(None); - assert_eq!(cb.page_size() % page_size.as_usize(), 0, "code page size is not page-aligned"); + let asm_comments = get_option_ref!(dump_disasm).is_some(); + let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone(), asm_comments); + let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages, asm_comments)); (cb, ocb) }; @@ -7406,114 +11026,49 @@ impl CodegenGlobals { // In test mode we're not linking with the C code // so we don't allocate executable memory #[cfg(test)] - let mut cb = CodeBlock::new_dummy(mem_size / 2); + let mut cb = CodeBlock::new_dummy(exec_mem_size / 2); #[cfg(test)] - let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2)); + let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(exec_mem_size / 2)); let ocb_start_addr = ocb.unwrap().get_write_ptr(); - let leave_exit_code = gen_leave_exit(&mut ocb); + let leave_exit_code = gen_leave_exit(&mut ocb).unwrap(); + let leave_exception_code = gen_leave_exception(&mut ocb).unwrap(); - let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); + let stub_exit_code = gen_stub_exit(&mut ocb).unwrap(); - let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb); + let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb).unwrap(); + let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb).unwrap(); // Generate full exit code for C func - let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); + let cfunc_exit_code = gen_full_cfunc_return(&mut ocb).unwrap(); let ocb_end_addr = ocb.unwrap().get_write_ptr(); - let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr); + let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr).collect(); // Mark all code memory as executable cb.mark_all_executable(); - ocb.unwrap().mark_all_executable(); - let mut codegen_globals = CodegenGlobals { + let codegen_globals = CodegenGlobals { + context_data: BitVector::new(), inline_cb: cb, outlined_cb: ocb, + ocb_pages, leave_exit_code, - stub_exit_code: stub_exit_code, + leave_exception_code, + stub_exit_code, outline_full_cfunc_return_pos: cfunc_exit_code, branch_stub_hit_trampoline, + entry_stub_hit_trampoline, global_inval_patches: Vec::new(), - inline_frozen_bytes: 0, - method_codegen_table: HashMap::new(), - ocb_pages, - freed_pages: None, - code_gc_count: 0, + pc_to_cfunc: HashMap::new(), }; - // Register the method codegen functions - codegen_globals.reg_method_codegen_fns(); - // Initialize the codegen globals instance unsafe { CODEGEN_GLOBALS = Some(codegen_globals); } } - // Register a specialized codegen function for a particular method. Note that - // the if the function returns true, the code it generates runs without a - // control frame and without interrupt checks. To avoid creating observable - // behavior changes, the codegen function should only target simple code paths - // that do not allocate and do not make method calls. - fn yjit_reg_method(&mut self, klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { - let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!"); - let mid = unsafe { rb_intern(id_string.as_ptr()) }; - let me = unsafe { rb_method_entry_at(klass, mid) }; - - if me.is_null() { - panic!("undefined optimized method!"); - } - - // For now, only cfuncs are supported - //RUBY_ASSERT(me && me->def); - //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC); - - let method_serial = unsafe { - let def = (*me).def; - get_def_method_serial(def) - }; - - self.method_codegen_table.insert(method_serial, gen_fn); - } - - /// Register codegen functions for some Ruby core methods - fn reg_method_codegen_fns(&mut self) { - unsafe { - // Specialization for C methods. See yjit_reg_method() for details. - self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); - - self.yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true); - self.yjit_reg_method(rb_mKernel, "nil?", jit_rb_false); - - self.yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal); - self.yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal); - self.yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal); - self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); - self.yjit_reg_method(rb_cInteger, "==", jit_rb_int_equal); - self.yjit_reg_method(rb_cInteger, "===", jit_rb_int_equal); - - // rb_str_to_s() methods in string.c - self.yjit_reg_method(rb_cString, "empty?", jit_rb_str_empty); - self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); - self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); - self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); - self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); - - self.yjit_reg_method(rb_mKernel, "respond_to?", jit_obj_respond_to); - - // Thread.current - self.yjit_reg_method( - rb_singleton_class(rb_cThread), - "current", - jit_thread_s_current, - ); - } - } - /// Get a mutable reference to the codegen globals instance pub fn get_instance() -> &'static mut CodegenGlobals { unsafe { CODEGEN_GLOBALS.as_mut().unwrap() } @@ -7523,6 +11078,11 @@ impl CodegenGlobals { unsafe { CODEGEN_GLOBALS.as_mut().is_some() } } + /// Get a mutable reference to the context data + pub fn get_context_data() -> &'static mut BitVector { + &mut CodegenGlobals::get_instance().context_data + } + /// Get a mutable reference to the inline code block pub fn get_inline_cb() -> &'static mut CodeBlock { &mut CodegenGlobals::get_instance().inline_cb @@ -7537,14 +11097,26 @@ impl CodegenGlobals { CodegenGlobals::get_instance().leave_exit_code } + pub fn get_leave_exception_code() -> CodePtr { + CodegenGlobals::get_instance().leave_exception_code + } + pub fn get_stub_exit_code() -> CodePtr { CodegenGlobals::get_instance().stub_exit_code } - pub fn push_global_inval_patch(i_pos: CodePtr, o_pos: CodePtr) { + pub fn push_global_inval_patch(inline_pos: CodePtr, outlined_pos: CodePtr, cb: &CodeBlock) { + if let Some(last_patch) = CodegenGlobals::get_instance().global_inval_patches.last() { + let patch_offset = inline_pos.as_offset() - last_patch.inline_patch_pos.as_offset(); + assert!( + patch_offset < 0 || cb.jmp_ptr_bytes() as i64 <= patch_offset, + "patches should not overlap (patch_offset: {patch_offset})", + ); + } + let patch = CodepagePatch { - inline_patch_pos: i_pos, - outlined_target_pos: o_pos, + inline_patch_pos: inline_pos, + outlined_target_pos: outlined_pos, }; CodegenGlobals::get_instance() .global_inval_patches @@ -7557,14 +11129,6 @@ impl CodegenGlobals { mem::take(&mut globals.global_inval_patches) } - pub fn get_inline_frozen_bytes() -> usize { - CodegenGlobals::get_instance().inline_frozen_bytes - } - - pub fn set_inline_frozen_bytes(frozen_bytes: usize) { - CodegenGlobals::get_instance().inline_frozen_bytes = frozen_bytes; - } - pub fn get_outline_full_cfunc_return_pos() -> CodePtr { CodegenGlobals::get_instance().outline_full_cfunc_return_pos } @@ -7573,31 +11137,16 @@ impl CodegenGlobals { CodegenGlobals::get_instance().branch_stub_hit_trampoline } - pub fn look_up_codegen_method(method_serial: usize) -> Option<MethodGenFn> { - let table = &CodegenGlobals::get_instance().method_codegen_table; - - let option_ref = table.get(&method_serial); - match option_ref { - None => None, - Some(&mgf) => Some(mgf), // Deref - } + pub fn get_entry_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().entry_stub_hit_trampoline } pub fn get_ocb_pages() -> &'static Vec<usize> { &CodegenGlobals::get_instance().ocb_pages } - pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> { - &mut CodegenGlobals::get_instance().freed_pages - } - - pub fn set_freed_pages(freed_pages: Vec<usize>) { - CodegenGlobals::get_instance().freed_pages = Some(freed_pages); - CodegenGlobals::get_instance().code_gc_count += 1; - } - - pub fn get_code_gc_count() -> usize { - CodegenGlobals::get_instance().code_gc_count + pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> { + &mut CodegenGlobals::get_instance().pc_to_cfunc } } @@ -7605,22 +11154,28 @@ impl CodegenGlobals { mod tests { use super::*; - fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) { - let blockid = BlockId { - iseq: ptr::null(), - idx: 0, - }; - let block = Block::new(blockid, &Context::default()); + fn setup_codegen() -> (Context, Assembler, CodeBlock, OutlinedCb) { + let cb = CodeBlock::new_dummy(256 * 1024); return ( - JITState::new(&block), Context::default(), - Assembler::new(), - CodeBlock::new_dummy(256 * 1024), + Assembler::new(0), + cb, OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), ); } + fn dummy_jit_state<'a>(cb: &mut CodeBlock, ocb: &'a mut OutlinedCb) -> JITState<'a> { + JITState::new( + BlockId { iseq: std::ptr::null(), idx: 0 }, + Context::default(), + cb.get_write_ptr(), + ptr::null(), // No execution context in tests. No peeking! + ocb, + true, + ) + } + #[test] fn test_gen_leave_exit() { let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)); @@ -7630,250 +11185,249 @@ mod tests { #[test] fn test_gen_exit() { - let (_, ctx, mut asm, mut cb, _) = setup_codegen(); - gen_exit(0 as *mut VALUE, &ctx, &mut asm); - asm.compile(&mut cb); + let (_ctx, mut asm, mut cb, _) = setup_codegen(); + gen_exit(0 as *mut VALUE, &mut asm); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_get_side_exit() { - let (mut jit, ctx, _, _, mut ocb) = setup_codegen(); - get_side_exit(&mut jit, &mut ocb, &ctx); + let (ctx, mut asm, _, mut ocb) = setup_codegen(); + let side_exit_context = SideExitContext::new(0 as _, ctx); + asm.get_side_exit(&side_exit_context, None, &mut ocb); assert!(ocb.unwrap().get_write_pos() > 0); } #[test] fn test_gen_check_ints() { - let (_, _ctx, mut asm, _cb, mut ocb) = setup_codegen(); - let side_exit = ocb.unwrap().get_write_ptr().as_side_exit(); - gen_check_ints(&mut asm, side_exit); + let (_ctx, mut asm, _cb, _ocb) = setup_codegen(); + asm.set_side_exit_context(0 as _, 0); + gen_check_ints(&mut asm, Counter::guard_send_interrupted); } #[test] fn test_gen_nop() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - let status = gen_nop(&mut jit, &mut context, &mut asm, &mut ocb); - asm.compile(&mut cb); + let (context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + let status = gen_nop(&mut jit, &mut asm); + asm.compile(&mut cb, None).unwrap(); - assert_eq!(status, KeepCompiling); - assert_eq!(context.diff(&Context::default()), 0); + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(context.diff(&Context::default()), TypeDiff::Compatible(0)); assert_eq!(cb.get_write_pos(), 0); } #[test] fn test_gen_pop() { - let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen(); - let mut context = Context::default(); - context.stack_push(Type::Fixnum); - let status = gen_pop(&mut jit, &mut context, &mut asm, &mut ocb); + let (_, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + let context = Context::default(); + asm.stack_push(Type::Fixnum); + let status = gen_pop(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); - assert_eq!(context.diff(&Context::default()), 0); + assert_eq!(status, Some(KeepCompiling)); + let mut default = Context::default(); + default.set_reg_mapping(context.get_reg_mapping()); + assert_eq!(context.diff(&default), TypeDiff::Compatible(0)); } #[test] fn test_gen_dup() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - let status = gen_dup(&mut jit, &mut context, &mut asm, &mut ocb); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Fixnum); + let status = gen_dup(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); // Did we duplicate the type information for the Fixnum type? - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0))); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_dupn() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2 let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_dupn(&mut jit, &mut context, &mut asm, &mut ocb); + let status = gen_dupn(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(3))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(3))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); // TODO: this is writing zero bytes on x86. Why? - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] - fn test_gen_swap() { - let (mut jit, mut context, mut asm, _cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); - - let status = gen_swap(&mut jit, &mut context, &mut asm, &mut ocb); - - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); - let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1)); + fn test_gen_opt_reverse() { + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::Fixnum); - assert_eq!(tmp_type_next, Type::Flonum); - } - - #[test] - fn test_putnil() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - let status = gen_putnil(&mut jit, &mut context, &mut asm, &mut ocb); + // Odd number of elements + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); + let mut value_array: [u64; 2] = [0, 3]; + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::Nil); - asm.compile(&mut cb); - assert!(cb.get_write_pos() > 0); - } + let mut status = gen_opt_reverse(&mut jit, &mut asm); - #[test] - fn test_putobject_qtrue() { - // Test gen_putobject with Qtrue - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + assert_eq!(status, Some(KeepCompiling)); - let mut value_array: [u64; 2] = [0, Qtrue.into()]; - let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; - jit.pc = pc; + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0))); - let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); + // Try again with an even number of elements. + asm.stack_push(Type::Nil); + value_array[1] = 4; + status = gen_opt_reverse(&mut jit, &mut asm); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::True); - asm.compile(&mut cb); - assert!(cb.get_write_pos() > 0); + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(3))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0))); } #[test] - fn test_putobject_fixnum() { - // Test gen_putobject with a Fixnum to test another conditional branch - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - - // The Fixnum 7 is encoded as 7 * 2 + 1, or 15 - let mut value_array: [u64; 2] = [0, 15]; - let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; - jit.pc = pc; + fn test_gen_swap() { + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); - let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); + let status = gen_swap(&mut jit, &mut asm); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); + let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1)); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); assert_eq!(tmp_type_top, Type::Fixnum); - asm.compile(&mut cb); - assert!(cb.get_write_pos() > 0); + assert_eq!(tmp_type_next, Type::Flonum); } #[test] - fn test_int2fix() { - let (mut jit, mut context, mut asm, _cb, mut ocb) = setup_codegen(); - jit.opcode = YARVINSN_putobject_INT2FIX_0_.as_usize(); - let status = gen_putobject_int2fix(&mut jit, &mut context, &mut asm, &mut ocb); + fn test_putnil() { + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + let status = gen_putnil(&mut jit, &mut asm); - let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); - // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally. - assert_eq!(status, KeepCompiling); - assert_eq!(tmp_type_top, Type::Fixnum); + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(tmp_type_top, Type::Nil); + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); } + #[test] fn test_putself() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - let status = gen_putself(&mut jit, &mut context, &mut asm, &mut ocb); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + let status = gen_putself(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); - asm.compile(&mut cb); + assert_eq!(status, Some(KeepCompiling)); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_setn() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Fixnum); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); let mut value_array: [u64; 2] = [0, 2]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_setn(&mut jit, &mut context, &mut asm, &mut ocb); + let status = gen_setn(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0))); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_topn() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); let mut value_array: [u64; 2] = [0, 1]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_topn(&mut jit, &mut context, &mut asm, &mut ocb); + let status = gen_topn(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2))); - assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(1))); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_adjuststack() { - let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); - context.stack_push(Type::Flonum); - context.stack_push(Type::CString); - context.stack_push(Type::Fixnum); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + asm.stack_push(Type::Fixnum); let mut value_array: [u64; 3] = [0, 2, 0]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_adjuststack(&mut jit, &mut context, &mut asm, &mut ocb); + let status = gen_adjuststack(&mut jit, &mut asm); - assert_eq!(status, KeepCompiling); + assert_eq!(status, Some(KeepCompiling)); - assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); assert!(cb.get_write_pos() == 0); // No instructions written } #[test] fn test_gen_leave() { - let (mut jit, mut context, mut asm, _cb, mut ocb) = setup_codegen(); + let (_context, mut asm, mut cb, mut ocb) = setup_codegen(); + let mut jit = dummy_jit_state(&mut cb, &mut ocb); // Push return value - context.stack_push(Type::Fixnum); - gen_leave(&mut jit, &mut context, &mut asm, &mut ocb); + asm.stack_push(Type::Fixnum); + asm.set_side_exit_context(0 as _, 0); + gen_leave(&mut jit, &mut asm); } } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 15b8fe4466..0590135392 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1,3 +1,8 @@ +//! Code versioning, retained live control flow graph mutations, type tracking, etc. + +// So we can comment on individual uses of `unsafe` in `unsafe` functions +#![warn(unsafe_op_in_unsafe_fn)] + use crate::asm::*; use crate::backend::ir::*; use crate::codegen::*; @@ -10,24 +15,36 @@ use crate::utils::*; use crate::disasm::*; use core::ffi::c_void; use std::cell::*; +use std::fmt; +use std::mem; +use std::mem::transmute; +use std::ops::Range; +use std::rc::Rc; use std::collections::HashSet; +use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; -use std::mem; -use std::rc::{Rc}; +use mem::MaybeUninit; +use std::ptr; +use ptr::NonNull; use YARVOpnd::*; use TempMapping::*; -use crate::invariants::block_assumptions_free; +use crate::invariants::*; -// Maximum number of temp value types we keep track of -pub const MAX_TEMP_TYPES: usize = 8; +// Maximum number of temp value types or registers we keep track of +pub const MAX_CTX_TEMPS: usize = 8; -// Maximum number of local variable types we keep track of -const MAX_LOCAL_TYPES: usize = 8; +// Maximum number of local variable types or registers we keep track of +const MAX_CTX_LOCALS: usize = 8; + +/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points +/// to a YARV instruction or an instruction operand. +pub type IseqIdx = u16; // Represent the type of a value (local/stack/self) in YJIT -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +#[repr(u8)] pub enum Type { - Unknown, + Unknown = 0, UnknownImm, UnknownHeap, Nil, @@ -35,19 +52,20 @@ pub enum Type { False, Fixnum, Flonum, - Hash, ImmSymbol, - #[allow(unused)] - HeapSymbol, - TString, // An object with the T_STRING flag set, possibly an rb_cString - CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases) + CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it) TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray - CArray, // An un-subclassed string of type rb_cArray (can have instance vars in some cases) + CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it) + THash, // An object with the T_HASH flag set, possibly an rb_cHash + CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it) BlockParamProxy, // A special sentinel value indicating the block parameter should be read from // the current surrounding cfp + + // The context currently relies on types taking at most 4 bits (max value 15) + // to encode, so if we add any more, we will need to refactor the context. } // Default initialization @@ -80,12 +98,11 @@ impl Type { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. // But CString vs TString is only an optimisation and shouldn't affect correctness. #[cfg(not(test))] - if val.class_of() == unsafe { rb_cString } { - return Type::CString; - } - #[cfg(not(test))] - if val.class_of() == unsafe { rb_cArray } { - return Type::CArray; + match val.class_of() { + class if class == unsafe { rb_cArray } => return Type::CArray, + class if class == unsafe { rb_cHash } => return Type::CHash, + class if class == unsafe { rb_cString } => return Type::CString, + _ => {} } // We likewise can't reference rb_block_param_proxy, but it's again an optimisation; // we can just treat it as a normal Object. @@ -95,7 +112,7 @@ impl Type { } match val.builtin_type() { RUBY_T_ARRAY => Type::TArray, - RUBY_T_HASH => Type::Hash, + RUBY_T_HASH => Type::THash, RUBY_T_STRING => Type::TString, _ => Type::UnknownHeap, } @@ -137,14 +154,30 @@ impl Type { Type::UnknownHeap => true, Type::TArray => true, Type::CArray => true, - Type::Hash => true, - Type::HeapSymbol => true, + Type::THash => true, + Type::CHash => true, Type::TString => true, Type::CString => true, + Type::BlockParamProxy => true, _ => false, } } + /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY) + pub fn is_array(&self) -> bool { + matches!(self, Type::TArray | Type::CArray) + } + + /// Check if it's a T_HASH object (both THash and CHash are T_HASH) + pub fn is_hash(&self) -> bool { + matches!(self, Type::THash | Type::CHash) + } + + /// Check if it's a T_STRING object (both TString and CString are T_STRING) + pub fn is_string(&self) -> bool { + matches!(self, Type::TString | Type::CString) + } + /// Returns an Option with the T_ value type if it is known, otherwise None pub fn known_value_type(&self) -> Option<ruby_value_type> { match self { @@ -154,8 +187,8 @@ impl Type { Type::Fixnum => Some(RUBY_T_FIXNUM), Type::Flonum => Some(RUBY_T_FLOAT), Type::TArray | Type::CArray => Some(RUBY_T_ARRAY), - Type::Hash => Some(RUBY_T_HASH), - Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL), + Type::THash | Type::CHash => Some(RUBY_T_HASH), + Type::ImmSymbol => Some(RUBY_T_SYMBOL), Type::TString | Type::CString => Some(RUBY_T_STRING), Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None, Type::BlockParamProxy => None, @@ -171,9 +204,10 @@ impl Type { Type::False => Some(rb_cFalseClass), Type::Fixnum => Some(rb_cInteger), Type::Flonum => Some(rb_cFloat), - Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol), - Type::CString => Some(rb_cString), + Type::ImmSymbol => Some(rb_cSymbol), Type::CArray => Some(rb_cArray), + Type::CHash => Some(rb_cHash), + Type::CString => Some(rb_cString), _ => None, } } @@ -212,66 +246,83 @@ impl Type { } /// Compute a difference between two value types - /// Returns 0 if the two are the same - /// Returns > 0 if different but compatible - /// Returns usize::MAX if incompatible - pub fn diff(self, dst: Self) -> usize { + pub fn diff(self, dst: Self) -> TypeDiff { // Perfect match, difference is zero if self == dst { - return 0; + return TypeDiff::Compatible(0); } // Any type can flow into an unknown type if dst == Type::Unknown { - return 1; - } - - // A CString is also a TString. - if self == Type::CString && dst == Type::TString { - return 1; + return TypeDiff::Compatible(1); } // A CArray is also a TArray. if self == Type::CArray && dst == Type::TArray { - return 1; + return TypeDiff::Compatible(1); + } + + // A CHash is also a THash. + if self == Type::CHash && dst == Type::THash { + return TypeDiff::Compatible(1); + } + + // A CString is also a TString. + if self == Type::CString && dst == Type::TString { + return TypeDiff::Compatible(1); } // Specific heap type into unknown heap type is imperfect but valid if self.is_heap() && dst == Type::UnknownHeap { - return 1; + return TypeDiff::Compatible(1); } // Specific immediate type into unknown immediate type is imperfect but valid if self.is_imm() && dst == Type::UnknownImm { - return 1; + return TypeDiff::Compatible(1); } // Incompatible types - return usize::MAX; + return TypeDiff::Incompatible; } /// Upgrade this type into a more specific compatible type /// The new type must be compatible and at least as specific as the previously known type. - fn upgrade(&mut self, src: Self) { - // Here we're checking that src is more specific than self - assert!(src.diff(*self) != usize::MAX); - *self = src; + fn upgrade(&mut self, new_type: Self) { + // We can only upgrade to a type that is more specific + assert!(new_type.diff(*self) != TypeDiff::Incompatible); + *self = new_type; } } -// Potential mapping of a value on the temporary stack to -// self, a local variable or constant so that we can track its type -#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[derive(Debug, Eq, PartialEq)] +pub enum TypeDiff { + // usize == 0: Same type + // usize >= 1: Different but compatible. The smaller, the more compatible. + Compatible(usize), + Incompatible, +} + +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] pub enum TempMapping { - MapToStack, // Normal stack value - MapToSelf, // Temp maps to the self operand - MapToLocal(u8), // Temp maps to a local variable with index - //ConstMapping, // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue) + MapToStack(Type), + MapToSelf, + MapToLocal(u8), } impl Default for TempMapping { fn default() -> Self { - MapToStack + TempMapping::MapToStack(Type::default()) + } +} + +impl TempMapping { + /// Return TempMapping without type information in MapToStack + pub fn without_type(&self) -> TempMapping { + match self { + MapToStack(_) => TempMapping::MapToStack(Type::default()), + _ => *self, + } } } @@ -282,35 +333,883 @@ pub enum YARVOpnd { SelfOpnd, // Temporary stack operand with stack index - StackOpnd(u16), + StackOpnd(u8), +} + +impl From<Opnd> for YARVOpnd { + fn from(value: Opnd) -> Self { + match value { + Opnd::Stack { idx, .. } => StackOpnd(idx.try_into().unwrap()), + _ => unreachable!("{:?} cannot be converted to YARVOpnd", value) + } + } +} + +/// Number of registers that can be used for stack temps or locals +pub const MAX_MAPPED_REGS: usize = 5; + +/// A stack slot or a local variable. u8 represents the index of it (<= 8). +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +pub enum RegOpnd { + Stack(u8), + Local(u8), +} + +/// RegMappings manages a set of registers used for stack temps and locals. +/// Each element of the array represents each of the registers. +/// If an element is Some, the stack temp or the local uses a register. +/// +/// Note that Opnd::InsnOut uses a separate set of registers at the moment. +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq)] +pub struct RegMapping([Option<RegOpnd>; MAX_MAPPED_REGS]); + +impl RegMapping { + /// Return the index of the register for a given operand if allocated. + pub fn get_reg(&self, opnd: RegOpnd) -> Option<usize> { + self.0.iter().enumerate() + .find(|(_, ®_opnd)| reg_opnd == Some(opnd)) + .map(|(reg_idx, _)| reg_idx) + } + + /// Set a given operand to the register at a given index. + pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) { + assert!(self.0[reg_idx].is_none()); + self.0[reg_idx] = Some(opnd); + } + + /// Allocate a register for a given operand if available. + /// Return true if self is updated. + pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool { + // If a given opnd already has a register, skip allocation. + if self.get_reg(opnd).is_some() { + return false; + } + + // If the index is too large to encode with with 3 bits, give up. + match opnd { + RegOpnd::Stack(stack_idx) => if stack_idx >= MAX_CTX_TEMPS as u8 { + return false; + } + RegOpnd::Local(local_idx) => if local_idx >= MAX_CTX_LOCALS as u8 { + return false; + } + }; + + // Allocate a register if available. + if let Some(reg_idx) = self.find_unused_reg(opnd) { + self.0[reg_idx] = Some(opnd); + return true; + } + false + } + + /// Deallocate a register for a given operand if in use. + /// Return true if self is updated. + pub fn dealloc_reg(&mut self, opnd: RegOpnd) -> bool { + for reg_opnd in self.0.iter_mut() { + if *reg_opnd == Some(opnd) { + *reg_opnd = None; + return true; + } + } + false + } + + /// Find an available register and return the index of it. + fn find_unused_reg(&self, opnd: RegOpnd) -> Option<usize> { + let num_regs = get_option!(num_temp_regs); + if num_regs == 0 { + return None; + } + assert!(num_regs <= MAX_MAPPED_REGS); + + // If the default index for the operand is available, use that to minimize + // discrepancies among Contexts. + let default_idx = match opnd { + RegOpnd::Stack(stack_idx) => stack_idx.as_usize() % num_regs, + RegOpnd::Local(local_idx) => num_regs - (local_idx.as_usize() % num_regs) - 1, + }; + if self.0[default_idx].is_none() { + return Some(default_idx); + } + + // If not, pick any other available register. Like default indexes, prefer + // lower indexes for Stack, and higher indexes for Local. + let mut index_temps = self.0.iter().enumerate(); + match opnd { + RegOpnd::Stack(_) => index_temps.find(|(_, reg_opnd)| reg_opnd.is_none()), + RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()), + }.map(|(index, _)| index) + } + + /// Return a vector of RegOpnds that have an allocated register + pub fn get_reg_opnds(&self) -> Vec<RegOpnd> { + self.0.iter().filter_map(|®_opnd| reg_opnd).collect() + } + + /// Count the number of registers that store a different operand from `dst`. + pub fn diff(&self, dst: RegMapping) -> usize { + self.0.iter().enumerate().filter(|&(reg_idx, ®)| reg != dst.0[reg_idx]).count() + } } +impl fmt::Debug for RegMapping { + /// Print `[None, ...]` instead of the default `RegMappings([None, ...])` + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{:?}", self.0) + } +} + +/// Maximum value of the chain depth (should fit in 5 bits) +const CHAIN_DEPTH_MAX: u8 = 0b11111; // 31 + /// Code generation context /// Contains information we can use to specialize/optimize code -/// There are a lot of context objects so we try to keep the size small. -#[derive(Clone, Default, PartialEq, Debug)] +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] pub struct Context { // Number of values currently on the temporary stack - stack_size: u16, + stack_size: u8, // Offset of the JIT SP relative to the interpreter SP // This represents how far the JIT's SP is from the "real" SP - sp_offset: i16, + sp_offset: i8, + + /// Which stack temps or locals are in a register + reg_mapping: RegMapping, // Depth of this block in the sidechain (eg: inline-cache chain) + // 6 bits, max 63 chain_depth: u8, - // Local variable types we keep track of - local_types: [Type; MAX_LOCAL_TYPES], + // Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) + is_return_landing: bool, - // Temporary variable types we keep track of - temp_types: [Type; MAX_TEMP_TYPES], + // Whether the compilation of this code has been deferred ([Self::is_deferred]) + is_deferred: bool, // Type we track for self self_type: Type, - // Mapping of temp stack entries to types we track - temp_mapping: [TempMapping; MAX_TEMP_TYPES], + // Local variable types we keep track of + local_types: [Type; MAX_CTX_LOCALS], + + // Temp mapping type/local_idx we track + temp_mapping: [TempMapping; MAX_CTX_TEMPS], + + /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined. + inline_block: Option<IseqPtr>, +} + +#[derive(Clone)] +pub struct BitVector { + // Flat vector of bytes to write into + bytes: Vec<u8>, + + // Number of bits taken out of bytes allocated + num_bits: usize, +} + +impl BitVector { + pub fn new() -> Self { + Self { + bytes: Vec::with_capacity(4096), + num_bits: 0, + } + } + + #[allow(unused)] + pub fn num_bits(&self) -> usize { + self.num_bits + } + + // Total number of bytes taken + #[allow(unused)] + pub fn num_bytes(&self) -> usize { + (self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 } + } + + // Write/append an unsigned integer value + fn push_uint(&mut self, mut val: u64, mut num_bits: usize) { + assert!(num_bits <= 64); + + // Mask out bits above the number of bits requested + let mut val_bits = val; + if num_bits < 64 { + val_bits &= (1 << num_bits) - 1; + assert!(val == val_bits); + } + + // Number of bits encoded in the last byte + let rem_bits = self.num_bits % 8; + + // Encode as many bits as we can in this last byte + if rem_bits != 0 { + let num_enc = std::cmp::min(num_bits, 8 - rem_bits); + let bit_mask = (1 << num_enc) - 1; + let frac_bits = (val & bit_mask) << rem_bits; + let frac_bits: u8 = frac_bits.try_into().unwrap(); + let last_byte_idx = self.bytes.len() - 1; + self.bytes[last_byte_idx] |= frac_bits; + + self.num_bits += num_enc; + num_bits -= num_enc; + val >>= num_enc; + } + + // While we have bits left to encode + while num_bits > 0 { + // Grow with a 1.2x growth factor instead of 2x + assert!(self.num_bits % 8 == 0); + let num_bytes = self.num_bits / 8; + if num_bytes == self.bytes.capacity() { + self.bytes.reserve_exact(self.bytes.len() / 5); + } + + let bits = val & 0xFF; + let bits: u8 = bits.try_into().unwrap(); + self.bytes.push(bits); + + let bits_to_encode = std::cmp::min(num_bits, 8); + self.num_bits += bits_to_encode; + num_bits -= bits_to_encode; + val >>= bits_to_encode; + } + } + + fn push_u8(&mut self, val: u8) { + self.push_uint(val as u64, 8); + } + + fn push_u5(&mut self, val: u8) { + assert!(val <= 0b11111); + self.push_uint(val as u64, 5); + } + + fn push_u4(&mut self, val: u8) { + assert!(val <= 0b1111); + self.push_uint(val as u64, 4); + } + + fn push_u3(&mut self, val: u8) { + assert!(val <= 0b111); + self.push_uint(val as u64, 3); + } + + fn push_u2(&mut self, val: u8) { + assert!(val <= 0b11); + self.push_uint(val as u64, 2); + } + + fn push_u1(&mut self, val: u8) { + assert!(val <= 0b1); + self.push_uint(val as u64, 1); + } + + fn push_bool(&mut self, val: bool) { + self.push_u1(if val { 1 } else { 0 }); + } + + // Push a context encoding opcode + fn push_op(&mut self, op: CtxOp) { + self.push_u4(op as u8); + } + + // Read a uint value at a given bit index + // The bit index is incremented after the value is read + fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 { + let start_bit_idx = *bit_idx; + let mut cur_idx = *bit_idx; + + // Read the bits in the first byte + let bit_mod = cur_idx % 8; + let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod; + + let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod); + cur_idx += num_bits_in_byte; + num_bits -= num_bits_in_byte; + + let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1); + + // While we have bits left to read + while num_bits > 0 { + let num_bits_in_byte = std::cmp::min(num_bits, 8); + assert!(cur_idx % 8 == 0); + let byte = self.bytes[cur_idx / 8] as u64; + + let bits_in_byte = byte & ((1 << num_bits) - 1); + out_bits |= bits_in_byte << (cur_idx - start_bit_idx); + + // Move to the next byte/offset + cur_idx += num_bits_in_byte; + num_bits -= num_bits_in_byte; + } + + // Update the read index + *bit_idx = cur_idx; + + out_bits + } + + fn read_u8(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 8) as u8 + } + + fn read_u5(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 5) as u8 + } + + fn read_u4(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 4) as u8 + } + + fn read_u3(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 3) as u8 + } + + fn read_u2(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 2) as u8 + } + + fn read_u1(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 1) as u8 + } + + fn read_bool(&self, bit_idx: &mut usize) -> bool { + self.read_u1(bit_idx) != 0 + } + + fn read_op(&self, bit_idx: &mut usize) -> CtxOp { + unsafe { std::mem::transmute(self.read_u4(bit_idx)) } + } +} + +impl fmt::Debug for BitVector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // We print the higher bytes first + for (idx, byte) in self.bytes.iter().enumerate().rev() { + write!(f, "{:08b}", byte)?; + + // Insert a separator between each byte + if idx > 0 { + write!(f, "|")?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod bitvector_tests { + use super::*; + + #[test] + fn write_3() { + let mut arr = BitVector::new(); + arr.push_uint(3, 2); + assert!(arr.read_uint(&mut 0, 2) == 3); + } + + #[test] + fn write_11() { + let mut arr = BitVector::new(); + arr.push_uint(1, 1); + arr.push_uint(1, 1); + assert!(arr.read_uint(&mut 0, 2) == 3); + } + + #[test] + fn write_11_overlap() { + let mut arr = BitVector::new(); + arr.push_uint(0, 7); + arr.push_uint(3, 2); + arr.push_uint(1, 1); + + //dbg!(arr.read_uint(7, 2)); + assert!(arr.read_uint(&mut 7, 2) == 3); + } + + #[test] + fn write_ff_0() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF, 8); + assert!(arr.read_uint(&mut 0, 8) == 0xFF); + } + + #[test] + fn write_ff_3() { + // Write 0xFF at bit index 3 + let mut arr = BitVector::new(); + arr.push_uint(0, 3); + arr.push_uint(0xFF, 8); + assert!(arr.read_uint(&mut 3, 8) == 0xFF); + } + + #[test] + fn write_ff_sandwich() { + // Write 0xFF sandwiched between zeros + let mut arr = BitVector::new(); + arr.push_uint(0, 3); + arr.push_u8(0xFF); + arr.push_uint(0, 3); + assert!(arr.read_uint(&mut 3, 8) == 0xFF); + } + + #[test] + fn write_read_u32_max() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF_FF_FF_FF, 32); + assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF); + } + + #[test] + fn write_read_u32_max_64b() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF_FF_FF_FF, 64); + assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF); + } + + #[test] + fn write_read_u64_max() { + let mut arr = BitVector::new(); + arr.push_uint(u64::MAX, 64); + assert!(arr.read_uint(&mut 0, 64) == u64::MAX); + } + + #[test] + fn encode_default() { + let mut bits = BitVector::new(); + let ctx = Context::default(); + let start_idx = ctx.encode_into(&mut bits); + assert!(start_idx == 0); + assert!(bits.num_bits() > 0); + assert!(bits.num_bytes() > 0); + + // Make sure that the round trip matches the input + let ctx2 = Context::decode_from(&bits, 0); + assert!(ctx2 == ctx); + } + + #[test] + fn encode_default_2x() { + let mut bits = BitVector::new(); + + let ctx0 = Context::default(); + let idx0 = ctx0.encode_into(&mut bits); + + let mut ctx1 = Context::default(); + ctx1.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); + let idx1 = ctx1.encode_into(&mut bits); + + // Make sure that we can encode two contexts successively + let ctx0_dec = Context::decode_from(&bits, idx0); + let ctx1_dec = Context::decode_from(&bits, idx1); + assert!(ctx0_dec == ctx0); + assert!(ctx1_dec == ctx1); + } + + #[test] + fn regress_reg_mapping() { + let mut bits = BitVector::new(); + let mut ctx = Context::default(); + ctx.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); + ctx.encode_into(&mut bits); + + let b0 = bits.read_u1(&mut 0); + assert!(b0 == 1); + + // Make sure that the round trip matches the input + let ctx2 = Context::decode_from(&bits, 0); + assert!(ctx2 == ctx); + } +} + +// Context encoding opcodes (4 bits) +#[derive(Debug, Copy, Clone)] +#[repr(u8)] +enum CtxOp { + // Self type (4 bits) + SetSelfType = 0, + + // Local idx (3 bits), temp type (4 bits) + SetLocalType, + + // Map stack temp to self with known type + // Temp idx (3 bits), known type (4 bits) + SetTempType, + + // Map stack temp to a local variable + // Temp idx (3 bits), local idx (3 bits) + MapTempLocal, + + // Map a stack temp to self + // Temp idx (3 bits) + MapTempSelf, + + // Set inline block pointer (8 bytes) + SetInlineBlock, + + // End of encoding + EndOfCode, +} + +// Number of entries in the context cache +const CTX_ENCODE_CACHE_SIZE: usize = 1024; +const CTX_DECODE_CACHE_SIZE: usize = 1024; + +// Cache of the last contexts encoded/decoded +// Empirically this saves a few percent of memory and speeds up compilation +// We can experiment with varying the size of this cache +pub type CtxEncodeCache = [(Context, u32); CTX_ENCODE_CACHE_SIZE]; +static mut CTX_ENCODE_CACHE: Option<Box<CtxEncodeCache>> = None; + +// Cache of the last contexts encoded/decoded +// This speeds up compilation +pub type CtxDecodeCache = [(Context, u32); CTX_DECODE_CACHE_SIZE]; +static mut CTX_DECODE_CACHE: Option<Box<CtxDecodeCache>> = None; + +// Size of the context cache in bytes +pub const CTX_ENCODE_CACHE_BYTES: usize = std::mem::size_of::<CtxEncodeCache>(); +pub const CTX_DECODE_CACHE_BYTES: usize = std::mem::size_of::<CtxDecodeCache>(); + +impl Context { + // Encode a context into the global context data, or return + // a cached previously encoded offset if one is found + pub fn encode(&self) -> u32 { + incr_counter!(num_contexts_encoded); + + if *self == Context::default() { + incr_counter!(context_cache_hits); + return 0; + } + + if let Some(idx) = Self::encode_cache_get(self) { + incr_counter!(context_cache_hits); + debug_assert!(Self::decode(idx) == *self); + return idx; + } + + let context_data = CodegenGlobals::get_context_data(); + + // Make sure we don't use offset 0 because + // it's is reserved for the default context + if context_data.num_bits() == 0 { + context_data.push_u1(0); + } + + let idx = self.encode_into(context_data); + let idx: u32 = idx.try_into().unwrap(); + + // Save this offset into the cache + Self::encode_cache_set(self, idx); + Self::decode_cache_set(self, idx); + + // In debug mode, check that the round-trip decoding always matches + debug_assert!(Self::decode(idx) == *self); + + idx + } + + pub fn decode(start_idx: u32) -> Context { + if start_idx == 0 { + return Context::default(); + }; + + if let Some(ctx) = Self::decode_cache_get(start_idx) { + return ctx; + } + + let context_data = CodegenGlobals::get_context_data(); + let ctx = Self::decode_from(context_data, start_idx as usize); + + Self::encode_cache_set(&ctx, start_idx); + Self::decode_cache_set(&ctx, start_idx); + + ctx + } + + // Store an entry in a cache of recently encoded/decoded contexts for encoding + fn encode_cache_set(ctx: &Context, idx: u32) + { + // Compute the hash for this context + let mut hasher = DefaultHasher::new(); + ctx.hash(&mut hasher); + let ctx_hash = hasher.finish() as usize; + + unsafe { + // Lazily initialize the context cache + if CTX_ENCODE_CACHE == None { + // Here we use the vec syntax to avoid allocating the large table on the stack, + // as this can cause a stack overflow + let tbl = vec![(Context::default(), 0); CTX_ENCODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); + CTX_ENCODE_CACHE = Some(tbl); + } + + // Write a cache entry for this context + let cache = CTX_ENCODE_CACHE.as_mut().unwrap(); + cache[ctx_hash % CTX_ENCODE_CACHE_SIZE] = (*ctx, idx); + } + } + + // Store an entry in a cache of recently encoded/decoded contexts for decoding + fn decode_cache_set(ctx: &Context, idx: u32) { + unsafe { + // Lazily initialize the context cache + if CTX_DECODE_CACHE == None { + // Here we use the vec syntax to avoid allocating the large table on the stack, + // as this can cause a stack overflow + let tbl = vec![(Context::default(), 0); CTX_DECODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); + CTX_DECODE_CACHE = Some(tbl); + } + + // Write a cache entry for this context + let cache = CTX_DECODE_CACHE.as_mut().unwrap(); + cache[idx as usize % CTX_DECODE_CACHE_SIZE] = (*ctx, idx); + } + } + + // Lookup the context in a cache of recently encoded/decoded contexts for encoding + fn encode_cache_get(ctx: &Context) -> Option<u32> + { + // Compute the hash for this context + let mut hasher = DefaultHasher::new(); + ctx.hash(&mut hasher); + let ctx_hash = hasher.finish() as usize; + + unsafe { + if CTX_ENCODE_CACHE == None { + return None; + } + + let cache = CTX_ENCODE_CACHE.as_mut().unwrap(); + + // Check that the context for this cache entry matches + let cache_entry = &cache[ctx_hash % CTX_ENCODE_CACHE_SIZE]; + if cache_entry.0 == *ctx { + debug_assert!(cache_entry.1 != 0); + return Some(cache_entry.1); + } + + return None; + } + } + + // Lookup the context in a cache of recently encoded/decoded contexts for decoding + fn decode_cache_get(start_idx: u32) -> Option<Context> { + unsafe { + if CTX_DECODE_CACHE == None { + return None; + } + + let cache = CTX_DECODE_CACHE.as_mut().unwrap(); + + // Check that the start_idx for this cache entry matches + let cache_entry = &cache[start_idx as usize % CTX_DECODE_CACHE_SIZE]; + if cache_entry.1 == start_idx { + return Some(cache_entry.0); + } + + return None; + } + } + + // Encode into a compressed context representation in a bit vector + fn encode_into(&self, bits: &mut BitVector) -> usize { + let start_idx = bits.num_bits(); + + // Most of the time, the stack size is small and sp offset has the same value + if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 { + // One single bit to signify a compact stack_size/sp_offset encoding + debug_assert!(self.sp_offset >= 0); + bits.push_u1(1); + bits.push_u2(self.stack_size); + } else { + // Full stack size encoding + bits.push_u1(0); + + // Number of values currently on the temporary stack + bits.push_u8(self.stack_size); + + // sp_offset: i8, + bits.push_u8(self.sp_offset as u8); + } + + // Which stack temps or locals are in a register + for &temp in self.reg_mapping.0.iter() { + if let Some(temp) = temp { + bits.push_u1(1); // Some + match temp { + RegOpnd::Stack(stack_idx) => { + bits.push_u1(0); // Stack + bits.push_u3(stack_idx); + } + RegOpnd::Local(local_idx) => { + bits.push_u1(1); // Local + bits.push_u3(local_idx); + } + } + } else { + bits.push_u1(0); // None + } + } + + bits.push_bool(self.is_deferred); + bits.push_bool(self.is_return_landing); + + // The chain depth is most often 0 or 1 + if self.chain_depth < 2 { + bits.push_u1(0); + bits.push_u1(self.chain_depth); + + } else { + bits.push_u1(1); + bits.push_u5(self.chain_depth); + } + + // Encode the self type if known + if self.self_type != Type::Unknown { + bits.push_op(CtxOp::SetSelfType); + bits.push_u4(self.self_type as u8); + } + + // Encode the local types if known + for local_idx in 0..MAX_CTX_LOCALS { + let t = self.get_local_type(local_idx); + if t != Type::Unknown { + bits.push_op(CtxOp::SetLocalType); + bits.push_u3(local_idx as u8); + bits.push_u4(t as u8); + } + } + + // Encode stack temps + for stack_idx in 0..MAX_CTX_TEMPS { + let mapping = self.get_temp_mapping(stack_idx); + + match mapping { + MapToStack(temp_type) => { + if temp_type != Type::Unknown { + // Temp idx (3 bits), known type (4 bits) + bits.push_op(CtxOp::SetTempType); + bits.push_u3(stack_idx as u8); + bits.push_u4(temp_type as u8); + } + } + + MapToLocal(local_idx) => { + bits.push_op(CtxOp::MapTempLocal); + bits.push_u3(stack_idx as u8); + bits.push_u3(local_idx); + } + + MapToSelf => { + // Temp idx (3 bits) + bits.push_op(CtxOp::MapTempSelf); + bits.push_u3(stack_idx as u8); + } + } + } + + // Inline block pointer + if let Some(iseq) = self.inline_block { + bits.push_op(CtxOp::SetInlineBlock); + bits.push_uint(iseq as u64, 64); + } + + // TODO: should we add an op for end-of-encoding, + // or store num ops at the beginning? + bits.push_op(CtxOp::EndOfCode); + + start_idx + } + + // Decode a compressed context representation from a bit vector + fn decode_from(bits: &BitVector, start_idx: usize) -> Context { + let mut ctx = Context::default(); + + let mut idx = start_idx; + + // Small vs large stack size encoding + if bits.read_u1(&mut idx) == 1 { + ctx.stack_size = bits.read_u2(&mut idx); + ctx.sp_offset = ctx.stack_size as i8; + } else { + ctx.stack_size = bits.read_u8(&mut idx); + let sp_offset_bits = bits.read_u8(&mut idx); + ctx.sp_offset = sp_offset_bits as i8; + + // If the top bit is set, then the sp offset must be negative + debug_assert!(!( (sp_offset_bits & 0x80) != 0 && ctx.sp_offset > 0 )); + } + + // Which stack temps or locals are in a register + for index in 0..MAX_MAPPED_REGS { + if bits.read_u1(&mut idx) == 1 { // Some + let temp = if bits.read_u1(&mut idx) == 0 { // RegMapping::Stack + RegOpnd::Stack(bits.read_u3(&mut idx)) + } else { + RegOpnd::Local(bits.read_u3(&mut idx)) + }; + ctx.reg_mapping.0[index] = Some(temp); + } + } + + ctx.is_deferred = bits.read_bool(&mut idx); + ctx.is_return_landing = bits.read_bool(&mut idx); + + if bits.read_u1(&mut idx) == 0 { + ctx.chain_depth = bits.read_u1(&mut idx) + } else { + ctx.chain_depth = bits.read_u5(&mut idx) + } + + loop { + //println!("reading op"); + let op = bits.read_op(&mut idx); + //println!("got op {:?}", op); + + match op { + CtxOp::SetSelfType => { + ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) }; + } + + CtxOp::SetLocalType => { + let local_idx = bits.read_u3(&mut idx) as usize; + let t = unsafe { transmute(bits.read_u4(&mut idx)) }; + ctx.set_local_type(local_idx, t); + } + + // Map temp to stack (known type) + CtxOp::SetTempType => { + let temp_idx = bits.read_u3(&mut idx) as usize; + let temp_type = unsafe { transmute(bits.read_u4(&mut idx)) }; + ctx.set_temp_mapping(temp_idx, TempMapping::MapToStack(temp_type)); + } + + // Map temp to local + CtxOp::MapTempLocal => { + let temp_idx = bits.read_u3(&mut idx) as usize; + let local_idx = bits.read_u3(&mut idx); + ctx.set_temp_mapping(temp_idx, TempMapping::MapToLocal(local_idx)); + } + + // Map temp to self + CtxOp::MapTempSelf => { + let temp_idx = bits.read_u3(&mut idx) as usize; + ctx.set_temp_mapping(temp_idx, TempMapping::MapToSelf); + } + + // Inline block pointer + CtxOp::SetInlineBlock => { + ctx.inline_block = Some(bits.read_uint(&mut idx, 64) as IseqPtr); + } + + CtxOp::EndOfCode => break, + } + } + + ctx + } } /// Tuple of (iseq, idx) used to identify basic blocks @@ -322,7 +1221,7 @@ pub struct BlockId { pub iseq: IseqPtr, /// Index in the iseq where the block starts - pub idx: u32, + pub idx: u16, } /// Branch code shape enumeration @@ -333,12 +1232,127 @@ pub enum BranchShape { Default, // Neither target is next } -// Branch code generation function signature -type BranchGenFn = - fn(cb: &mut Assembler, target0: CodePtr, target1: Option<CodePtr>, shape: BranchShape) -> (); +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BranchGenFn { + BranchIf(Cell<BranchShape>), + BranchNil(Cell<BranchShape>), + BranchUnless(Cell<BranchShape>), + JumpToTarget0(Cell<BranchShape>), + JNZToTarget0, + JZToTarget0, + JBEToTarget0, + JBToTarget0, + JOMulToTarget0, + JITReturn, +} + +impl BranchGenFn { + pub fn call(&self, asm: &mut Assembler, target0: Target, target1: Option<Target>) { + match self { + BranchGenFn::BranchIf(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jz(target1.unwrap()), + BranchShape::Next1 => asm.jnz(target0), + BranchShape::Default => { + asm.jnz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchNil(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jne(target1.unwrap()), + BranchShape::Next1 => asm.je(target0), + BranchShape::Default => { + asm.je(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchUnless(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jnz(target1.unwrap()), + BranchShape::Next1 => asm.jz(target0), + BranchShape::Default => { + asm.jz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::JumpToTarget0(shape) => { + if shape.get() == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + if shape.get() == BranchShape::Default { + asm.jmp(target0); + } + } + BranchGenFn::JNZToTarget0 => { + asm.jnz(target0) + } + BranchGenFn::JZToTarget0 => { + asm.jz(target0) + } + BranchGenFn::JBEToTarget0 => { + asm.jbe(target0) + } + BranchGenFn::JBToTarget0 => { + asm.jb(target0) + } + BranchGenFn::JOMulToTarget0 => { + asm.jo_mul(target0) + } + BranchGenFn::JITReturn => { + asm_comment!(asm, "update cfp->jit_return"); + let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32; + let raw_ptr = asm.lea_jump_target(target0); + asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr); + } + } + } + + pub fn get_shape(&self) -> BranchShape { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) | + BranchGenFn::JumpToTarget0(shape) => shape.get(), + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => BranchShape::Default, + } + } + + pub fn set_shape(&self, new_shape: BranchShape) { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) => { + shape.set(new_shape); + } + BranchGenFn::JumpToTarget0(shape) => { + if new_shape == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + shape.set(new_shape); + } + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => { + assert_eq!(new_shape, BranchShape::Default); + } + } + } +} /// A place that a branch could jump to -#[derive(Debug)] +#[derive(Debug, Clone)] enum BranchTarget { Stub(Box<BranchStub>), // Not compiled yet Block(BlockRef), // Already compiled @@ -348,88 +1362,255 @@ impl BranchTarget { fn get_address(&self) -> Option<CodePtr> { match self { BranchTarget::Stub(stub) => stub.address, - BranchTarget::Block(blockref) => blockref.borrow().start_addr, + BranchTarget::Block(blockref) => Some(unsafe { blockref.as_ref() }.start_addr), } } fn get_blockid(&self) -> BlockId { match self { - BranchTarget::Stub(stub) => stub.id, - BranchTarget::Block(blockref) => blockref.borrow().blockid, + BranchTarget::Stub(stub) => BlockId { iseq: stub.iseq.get(), idx: stub.iseq_idx }, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.get_blockid(), } } - fn get_ctx(&self) -> Context { + fn get_ctx(&self) -> u32 { match self { - BranchTarget::Stub(stub) => stub.ctx.clone(), - BranchTarget::Block(blockref) => blockref.borrow().ctx.clone(), + BranchTarget::Stub(stub) => stub.ctx, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, } } fn get_block(&self) -> Option<BlockRef> { match self { BranchTarget::Stub(_) => None, - BranchTarget::Block(blockref) => Some(blockref.clone()), + BranchTarget::Block(blockref) => Some(*blockref), } } - fn set_iseq(&mut self, iseq: IseqPtr) { + fn set_iseq(&self, iseq: IseqPtr) { match self { - BranchTarget::Stub(stub) => stub.id.iseq = iseq, - BranchTarget::Block(blockref) => blockref.borrow_mut().blockid.iseq = iseq, + BranchTarget::Stub(stub) => stub.iseq.set(iseq), + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.iseq.set(iseq), } } } -#[derive(Debug)] +#[derive(Debug, Clone)] struct BranchStub { address: Option<CodePtr>, - id: BlockId, - ctx: Context, + iseq: Cell<IseqPtr>, + iseq_idx: IseqIdx, + ctx: u32, } /// Store info about an outgoing branch in a code segment /// Note: care must be taken to minimize the size of branch objects -struct Branch { +pub struct Branch { // Block this is attached to - block: BlockRef, + block: Cell<BlockRef>, // Positions where the generated code starts and ends - start_addr: Option<CodePtr>, - end_addr: Option<CodePtr>, // exclusive + start_addr: CodePtr, + end_addr: Cell<CodePtr>, // exclusive // Branch target blocks and their contexts - targets: [Option<Box<BranchTarget>>; 2], + targets: [Cell<Option<Box<BranchTarget>>>; 2], // Branch code generation function gen_fn: BranchGenFn, +} + +/// A [Branch] for a [Block] that is under construction. +/// Fields correspond, but may be `None` during construction. +pub struct PendingBranch { + /// Allocation holder for the address of the constructed branch + /// in error paths Box deallocates it. + uninit_branch: Box<MaybeUninit<Branch>>, + + /// Branch code generation function + gen_fn: BranchGenFn, + + /// Positions where the generated code starts and ends + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive + + /// Branch target blocks and their contexts + targets: [Cell<Option<Box<BranchTarget>>>; 2], +} + +impl Branch { + // Compute the size of the branch code + fn code_size(&self) -> usize { + (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize + } + + /// Get the address of one of the branch destination + fn get_target_address(&self, target_idx: usize) -> Option<CodePtr> { + unsafe { + self.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_address()) + } + } + + fn get_stub_count(&self) -> usize { + let mut count = 0; + for target in self.targets.iter() { + if unsafe { + // SAFETY: no mutation + matches!( + target.ref_unchecked().as_ref().map(Box::as_ref), + Some(BranchTarget::Stub(_)) + ) + } { + count += 1; + } + } + count + } - // Shape of the branch - shape: BranchShape, + fn assert_layout(&self) { + let shape = self.gen_fn.get_shape(); + assert!( + !(shape == BranchShape::Default && 0 == self.code_size()), + "zero-size branches are incorrect when code for neither targets are adjacent" + // One needs to issue some instruction to steer to the branch target + // when falling through isn't an option. + ); + } } impl std::fmt::Debug for Branch { + // Can't derive this because `targets: !Copy` due to Cell. fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO: expand this if needed. #[derive(Debug)] on Branch gave a - // strange error related to BranchGenFn + let targets = unsafe { + // SAFETY: + // While the references are live for the result of this function, + // no mutation happens because we are only calling derived fmt::Debug functions. + [self.targets[0].as_ptr().as_ref().unwrap(), self.targets[1].as_ptr().as_ref().unwrap()] + }; + formatter .debug_struct("Branch") + .field("block", &self.block) .field("start", &self.start_addr) .field("end", &self.end_addr) - .field("targets", &self.targets) + .field("targets", &targets) + .field("gen_fn", &self.gen_fn) .finish() } } -impl Branch { - // Compute the size of the branch code - fn code_size(&self) -> usize { - (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize) +impl PendingBranch { + /// Set up a branch target at `target_idx`. Find an existing block to branch to + /// or generate a stub for one. + #[must_use] + fn set_target( + &self, + target_idx: u32, + target: BlockId, + ctx: &Context, + jit: &mut JITState, + ) -> Option<CodePtr> { + // If the block already exists + if let Some(blockref) = find_block_version(target, ctx) { + let block = unsafe { blockref.as_ref() }; + + // Fill out the target with this block + self.targets[target_idx.as_usize()] + .set(Some(Box::new(BranchTarget::Block(blockref)))); + return Some(block.start_addr); + } + + // Compress/encode the context + let ctx = Context::encode(ctx); + + // The branch struct is uninitialized right now but as a stable address. + // We make sure the stub runs after the branch is initialized. + let branch_struct_addr = self.uninit_branch.as_ptr() as usize; + let stub_addr = gen_branch_stub(ctx, jit.iseq, jit.get_ocb(), branch_struct_addr, target_idx); + + if let Some(stub_addr) = stub_addr { + // Fill the branch target with a stub + self.targets[target_idx.as_usize()].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: Cell::new(target.iseq), + iseq_idx: target.idx, + ctx, + }))))); + } + + stub_addr } - /// Get the address of one of the branch destination - fn get_target_address(&self, target_idx: usize) -> Option<CodePtr> { - self.targets[target_idx].as_ref().and_then(|target| target.get_address()) + // Construct the branch and wire it up in the grpah + fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef { + // Make the branch + let branch = Branch { + block: Cell::new(uninit_block), + start_addr: self.start_addr.get().unwrap(), + end_addr: Cell::new(self.end_addr.get().unwrap()), + targets: self.targets, + gen_fn: self.gen_fn, + }; + // Move it to the designated place on + // the heap and unwrap MaybeUninit. + self.uninit_branch.write(branch); + let raw_branch: *mut MaybeUninit<Branch> = Box::into_raw(self.uninit_branch); + let branchref = NonNull::new(raw_branch as *mut Branch).expect("no null from Box"); + + // SAFETY: just allocated it + let branch = unsafe { branchref.as_ref() }; + // For block branch targets, put the new branch in the + // appropriate incoming list. + for target in branch.targets.iter() { + // SAFETY: no mutation + let out_block: Option<BlockRef> = unsafe { + target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(out_block) = out_block { + // SAFETY: These blockrefs come from set_target() which only puts blocks from + // ISeqs, which are all initialized. Note that uninit_block isn't in any ISeq + // payload yet. + unsafe { out_block.as_ref() }.incoming.push(branchref); + } + } + + branch.assert_layout(); + incr_counter!(compiled_branch_count); + + branchref + } +} + +// Store info about code used on YJIT entry +pub struct Entry { + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: CodePtr, // exclusive +} + +/// A [Branch] for a [Block] that is under construction. +pub struct PendingEntry { + pub uninit_entry: Box<MaybeUninit<Entry>>, + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive +} + +impl PendingEntry { + // Construct the entry in the heap + pub fn into_entry(mut self) -> EntryRef { + // Make the entry + let entry = Entry { + start_addr: self.start_addr.get().unwrap(), + end_addr: self.end_addr.get().unwrap(), + }; + // Move it to the designated place on the heap and unwrap MaybeUninit. + self.uninit_entry.write(entry); + let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry); + NonNull::new(raw_entry as *mut Entry).expect("no null from Box") } } @@ -441,50 +1622,69 @@ pub type CmePtr = *const rb_callable_method_entry_t; /// Note: care must be taken to minimize the size of block_t objects #[derive(Debug)] pub struct Block { - // Bytecode sequence (iseq, idx) this is a version of - blockid: BlockId, + // The byte code instruction sequence this is a version of. + // Can change due to moving GC. + iseq: Cell<IseqPtr>, - // Index one past the last instruction for this block in the iseq - end_idx: u32, + // Index range covered by this version in `ISEQ_BODY(iseq)->iseq_encoded`. + iseq_range: Range<IseqIdx>, // Context at the start of the block // This should never be mutated - ctx: Context, + ctx: u32, // Positions where the generated code starts and ends - start_addr: Option<CodePtr>, - end_addr: Option<CodePtr>, + start_addr: CodePtr, + end_addr: Cell<CodePtr>, // List of incoming branches (from predecessors) - // These are reference counted (ownership shared between predecessor and successors) - incoming: Vec<BranchRef>, + incoming: MutableBranchList, - // NOTE: we might actually be able to store the branches here without refcounting - // however, using a RefCell makes it easy to get a pointer to Branch objects - // // List of outgoing branches (to successors) - outgoing: Vec<BranchRef>, + // Infrequently mutated for control flow graph edits for saving memory. + outgoing: MutableBranchList, // FIXME: should these be code pointers instead? // Offsets for GC managed objects in the mainline code block - gc_obj_offsets: Vec<u32>, + gc_obj_offsets: Box<[u32]>, // CME dependencies of this block, to help to remove all pointers to this // block in the system. - cme_dependencies: Vec<CmePtr>, + cme_dependencies: Box<[Cell<CmePtr>]>, // Code address of an exit for `ctx` and `blockid`. // Used for block invalidation. - pub entry_exit: Option<CodePtr>, + entry_exit: Option<CodePtr>, } -/// Reference-counted pointer to a block that can be borrowed mutably. -/// Wrapped so we could implement [Hash] and [Eq] for use with stdlib collections. -#[derive(Debug)] -pub struct BlockRef(Rc<RefCell<Block>>); - -/// Reference-counted pointer to a branch that can be borrowed mutably -type BranchRef = Rc<RefCell<Branch>>; +/// Pointer to a [Block]. +/// +/// # Safety +/// +/// _Never_ derive a `&mut Block` from this and always use +/// [std::ptr::NonNull::as_ref] to get a `&Block`. `&'a mut` +/// in Rust asserts that there are no other references live +/// over the lifetime `'a`. This uniqueness assertion does +/// not hold in many situations for us, even when you ignore +/// the fact that our control flow graph can have cycles. +/// Here are just two examples where we have overlapping references: +/// - Yielding to a different OS thread within the same +/// ractor during compilation +/// - The GC calling [rb_yjit_iseq_mark] during compilation +/// +/// Technically, for soundness, we also need to ensure that +/// the we have the VM lock while the result of `as_ref()` +/// is live, so that no deallocation happens while the +/// shared reference is live. The vast majority of our code run while +/// holding the VM lock, though. +pub type BlockRef = NonNull<Block>; + +/// Pointer to a [Branch]. See [BlockRef] for notes about +/// proper usage. +pub type BranchRef = NonNull<Branch>; + +/// Pointer to an entry that is already added to an ISEQ +pub type EntryRef = NonNull<Entry>; /// List of block versions for a given blockid type VersionList = Vec<BlockRef>; @@ -493,48 +1693,53 @@ type VersionList = Vec<BlockRef>; /// An instance of this is stored on each iseq type VersionMap = Vec<VersionList>; -impl BlockRef { - /// Constructor - pub fn new(rc: Rc<RefCell<Block>>) -> Self { - Self(rc) +/// [Interior mutability][1] wrapper for a list of branches. +/// O(n) insertion, but space efficient. We generally expect +/// blocks to have only a few branches. +/// +/// [1]: https://doc.rust-lang.org/std/cell/struct.UnsafeCell.html +#[repr(transparent)] +struct MutableBranchList(Cell<Box<[BranchRef]>>); + +impl MutableBranchList { + fn push(&self, branch: BranchRef) { + // Temporary move the boxed slice out of self. + // oom=abort is load bearing here... + let mut current_list = self.0.take().into_vec(); + current_list.push(branch); + self.0.set(current_list.into_boxed_slice()); } - /// Borrow the block through [RefCell]. - pub fn borrow(&self) -> Ref<'_, Block> { - self.0.borrow() - } - - /// Borrow the block for mutation through [RefCell]. - pub fn borrow_mut(&self) -> RefMut<'_, Block> { - self.0.borrow_mut() + /// Iterate through branches in the list by moving out of the cell + /// and then putting it back when done. Modifications to this cell + /// during iteration will be discarded. + /// + /// Assumes panic=abort since panic=unwind during iteration would + /// leave the cell empty. + fn for_each(&self, mut f: impl FnMut(BranchRef)) { + let list = self.0.take(); + for branch in list.iter() { + f(*branch); + } + self.0.set(list); } -} -impl Clone for BlockRef { - /// Clone the [Rc] - fn clone(&self) -> Self { - Self(self.0.clone()) + /// Length of the list. + fn len(&self) -> usize { + // SAFETY: No cell mutation inside unsafe. + unsafe { self.0.ref_unchecked().len() } } } -impl Hash for BlockRef { - /// Hash the reference by hashing the pointer - fn hash<H: Hasher>(&self, state: &mut H) { - let rc_ptr = Rc::as_ptr(&self.0); - rc_ptr.hash(state); - } -} +impl fmt::Debug for MutableBranchList { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: the derived Clone for boxed slices does not mutate this Cell + let branches = unsafe { self.0.ref_unchecked().clone() }; -impl PartialEq for BlockRef { - /// Equality defined by allocation identity - fn eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.0, &other.0) + formatter.debug_list().entries(branches.iter()).finish() } } -/// It's comparison by identity so all the requirements are statisfied -impl Eq for BlockRef {} - /// This is all the data YJIT stores on an iseq /// This will be dynamically allocated by C code /// C code should pass an &mut IseqPayload to us @@ -542,11 +1747,14 @@ impl Eq for BlockRef {} #[derive(Default)] pub struct IseqPayload { // Basic block versions - version_map: VersionMap, + pub version_map: VersionMap, - // Indexes of code pages used by this this ISEQ + // Indexes of code pages used by this ISEQ pub pages: HashSet<usize>, + // List of ISEQ entry codes + pub entries: Vec<EntryRef>, + // Blocks that are invalidated but are not yet deallocated. // The code GC will free them later. pub dead_blocks: Vec<BlockRef>, @@ -605,17 +1813,19 @@ pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { /// Iterate over all existing ISEQs pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) { unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { - let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = std::mem::transmute(&mut *data); + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; callback(iseq); } let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; - unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; + unsafe { rb_jit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; } /// Iterate over all on-stack ISEQs pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) { unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { - let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = std::mem::transmute(&mut *data); + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; callback(iseq); } let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; @@ -633,23 +1843,34 @@ pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { /// Iterate over all NOT on-stack ISEQ payloads pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) { - let mut on_stack_iseqs: Vec<IseqPtr> = vec![]; - for_each_on_stack_iseq(|iseq| { - on_stack_iseqs.push(iseq); - }); - for_each_iseq(|iseq| { + // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first, + // which could move ISEQ pointers when GC.auto_compact = true. + // So for_each_on_stack_iseq() must be called after this, which doesn't run GC. + let mut iseqs: Vec<IseqPtr> = vec![]; + for_each_iseq(|iseq| iseqs.push(iseq)); + + // Get all ISEQs that are on a CFP of existing ECs. + let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new(); + for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); }); + + // Invoke the callback for iseqs - on_stack_iseqs + for iseq in iseqs { if !on_stack_iseqs.contains(&iseq) { if let Some(iseq_payload) = get_iseq_payload(iseq) { callback(iseq_payload); } } - }) + } } /// Free the per-iseq payload #[no_mangle] -pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { +pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) { + // Free invariants for the ISEQ + iseq_free_invariants(iseq); + let payload = { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; if payload.is_null() { // Nothing to free. return; @@ -663,27 +1884,45 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { // SAFETY: We got the pointer from Box::into_raw(). let payload = unsafe { Box::from_raw(payload) }; - // Increment the freed iseq count - incr_counter!(freed_iseq_count); - - // Free all blocks in the payload + // Free all blocks in version_map. The GC doesn't free running iseqs. for versions in &payload.version_map { for block in versions { - free_block(block); + // SAFETY: blocks in the version_map are always well connected + unsafe { free_block(*block, true) }; } } + + // Free dead blocks + for block in payload.dead_blocks { + unsafe { free_block(block, false) }; + } + + // Free all entries + for entryref in payload.entries.iter() { + let entry = unsafe { Box::from_raw(entryref.as_ptr()) }; + mem::drop(entry); + } + + // Increment the freed iseq count + incr_counter!(freed_iseq_count); } -/// GC callback for marking GC objects in the the per-iseq payload. +/// GC callback for marking GC objects in the per-iseq payload. #[no_mangle] pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { let payload = if payload.is_null() { // Nothing to mark. return; } else { - // SAFETY: It looks like the GC takes the VM lock while marking - // so we should be satisfying aliasing rules here. - unsafe { &*(payload as *const IseqPayload) } + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } }; // For marking VALUEs written into the inline code block. @@ -692,26 +1931,56 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { for versions in &payload.version_map { for block in versions { - let block = block.borrow(); + // SAFETY: all blocks inside version_map are initialized. + let block = unsafe { block.as_ref() }; + mark_block(block, cb, false); + } + } + // Mark dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + mark_block(block, cb, true); + } - unsafe { rb_gc_mark_movable(block.blockid.iseq.into()) }; + return; - // Mark method entry dependencies - for &cme_dep in &block.cme_dependencies { - unsafe { rb_gc_mark_movable(cme_dep.into()) }; - } + fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) { + unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; + + // Mark method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; + } - // Mark outgoing branch entries - for branch in &block.outgoing { - let branch = branch.borrow(); - for target in branch.targets.iter().flatten() { - unsafe { rb_gc_mark_movable(target.get_blockid().iseq.into()) }; + // Mark outgoing branch entries + block.outgoing.for_each(|branch| { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let target_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(target_iseq) = target_iseq { + unsafe { rb_gc_mark_movable(target_iseq.into()) }; } } + }); - // Walk over references to objects in generated code. - for offset in &block.gc_obj_offsets { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + // Mark references to objects in generated code. + // Skip for dead blocks since they shouldn't run. + if !dead { + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; @@ -725,17 +1994,24 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { } } -/// GC callback for updating GC objects in the the per-iseq payload. +/// GC callback for updating GC objects in the per-iseq payload. /// This is a mirror of [rb_yjit_iseq_mark]. #[no_mangle] -pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { +pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; let payload = if payload.is_null() { // Nothing to update. return; } else { - // SAFETY: It looks like the GC takes the VM lock while updating references - // so we should be satisfying aliasing rules here. - unsafe { &*(payload as *const IseqPayload) } + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } }; // Evict other threads from generated code since we are about to patch them. @@ -746,29 +2022,66 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { let cb = CodegenGlobals::get_inline_cb(); for versions in &payload.version_map { - for block in versions { - let mut block = block.borrow_mut(); + for version in versions { + // SAFETY: all blocks inside version_map are initialized + let block = unsafe { version.as_ref() }; + block_update_references(block, cb, false); + } + } + // Update dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + block_update_references(block, cb, true); + } - block.blockid.iseq = unsafe { rb_gc_location(block.blockid.iseq.into()) }.as_iseq(); + return; - // Update method entry dependencies - for cme_dep in &mut block.cme_dependencies { - *cme_dep = unsafe { rb_gc_location((*cme_dep).into()) }.as_cme(); - } + fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { + block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); + + // Update method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + let cur_cme: VALUE = cme_dep.get().into(); + let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); + cme_dep.set(new_cme); + } + + // Update outgoing branch entries + block.outgoing.for_each(|branch| { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let current_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; - // Update outgoing branch entries - for branch in &block.outgoing { - let mut branch = branch.borrow_mut(); - for target in branch.targets.iter_mut().flatten() { - target.set_iseq(unsafe { rb_gc_location(target.get_blockid().iseq.into()) }.as_iseq()); + if let Some(current_iseq) = current_iseq { + let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } + .as_iseq(); + // SAFETY: the Cell::set is not on the reference given out + // by ref_unchecked. + unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; } } + }); - // Walk over references to objects in generated code. - for offset in &block.gc_obj_offsets { + // Update references to objects in generated code. + // Skip for dead blocks since they shouldn't run and + // so there is no potential of writing over invalidation jumps + if !dead { + for offset in block.gc_obj_offsets.iter() { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); - let value_ptr: *const u8 = value_code_ptr.raw_ptr(); + let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_ptr = value_ptr as *mut VALUE; @@ -778,22 +2091,42 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { // Only write when the VALUE moves, to be copy-on-write friendly. if new_addr != object { - for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { - let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); - cb.write_mem(byte_code_ptr, byte) - .expect("patching existing code should be within bounds"); - } + // SAFETY: Since we already set code memory writable before the compacting phase, + // we can use raw memory accesses directly. + unsafe { value_ptr.write_unaligned(new_addr); } } } } + } +} - // Note that we would have returned already if YJIT is off. - cb.mark_all_executable(); +/// Mark all code memory as writable. +/// This function is useful for garbage collectors that update references in JIT-compiled code in +/// bulk. +#[no_mangle] +pub extern "C" fn rb_yjit_mark_all_writeable() { + if CodegenGlobals::has_instance() { + CodegenGlobals::get_inline_cb().mark_all_writeable(); + + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_writeable(); + } +} + +/// Mark all code memory as executable. +/// This function is useful for garbage collectors that update references in JIT-compiled code in +/// bulk. +#[no_mangle] +pub extern "C" fn rb_yjit_mark_all_executable() { + if CodegenGlobals::has_instance() { + CodegenGlobals::get_inline_cb().mark_all_executable(); - CodegenGlobals::get_outlined_cb() - .unwrap() - .mark_all_executable(); + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); + } } /// Get all blocks for a particular place in an iseq. @@ -833,15 +2166,28 @@ pub fn take_version_list(blockid: BlockId) -> VersionList { } } -/// Count the number of block versions matching a given blockid -fn get_num_versions(blockid: BlockId) -> usize { +/// Count the number of block versions that match a given BlockId and part of a Context +fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { + + // FIXME: this counting logic is going to be expensive. + // We should avoid it if possible + Some(payload) => { payload .version_map .get(insn_idx) - .map(|versions| versions.len()) + .map(|versions| { + versions.iter().filter(|&&version| { + let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx); + // Inline versions are counted separately towards MAX_INLINE_VERSIONS. + version_ctx.inline() == ctx.inline() && + // find_block_versions() finds only blocks with compatible reg_mapping, + // so count only versions with compatible reg_mapping. + version_ctx.reg_mapping == ctx.reg_mapping + }).count() + }) .unwrap_or(0) } None => 0, @@ -862,7 +2208,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { // For each version at this instruction index for version in version_list { // Clone the block ref and add it to the list - blocks.push(version.clone()); + blocks.push(*version); } } @@ -872,90 +2218,151 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { /// Retrieve a basic block version for an (iseq, idx) tuple /// This will return None if no version is found fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { - let versions = match get_version_list(blockid) { - Some(versions) => versions, - None => return None, - }; + let versions = get_version_list(blockid)?; // Best match found let mut best_version: Option<BlockRef> = None; let mut best_diff = usize::MAX; // For each version matching the blockid - for blockref in versions.iter_mut() { - let block = blockref.borrow(); - let diff = ctx.diff(&block.ctx); + for blockref in versions.iter() { + let block = unsafe { blockref.as_ref() }; + let block_ctx = Context::decode(block.ctx); // Note that we always prefer the first matching // version found because of inline-cache chains - if diff < best_diff { - best_version = Some(blockref.clone()); - best_diff = diff; + match ctx.diff(&block_ctx) { + TypeDiff::Compatible(diff) if diff < best_diff => { + best_version = Some(*blockref); + best_diff = diff; + } + _ => {} } } - // If greedy versioning is enabled - if get_option!(greedy_versioning) { - // If we're below the version limit, don't settle for an imperfect match - if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 { - return None; + return best_version; +} + +/// Find the closest RegMapping among ones that have already been compiled. +pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option<RegMapping> { + let versions = get_version_list(blockid)?; + + // Best match found + let mut best_mapping: Option<RegMapping> = None; + let mut best_diff = usize::MAX; + + // For each version matching the blockid + for blockref in versions.iter() { + let block = unsafe { blockref.as_ref() }; + let block_ctx = Context::decode(block.ctx); + + // Discover the best block that is compatible if we load/spill registers + match ctx.diff_allowing_reg_mismatch(&block_ctx) { + TypeDiff::Compatible(diff) if diff < best_diff => { + best_mapping = Some(block_ctx.get_reg_mapping()); + best_diff = diff; + } + _ => {} } } - return best_version; + best_mapping } +/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. +const MAX_INLINE_VERSIONS: usize = 1000; + /// Produce a generic context when the block version limit is hit for a blockid pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { // Guard chains implement limits separately, do nothing - if ctx.chain_depth > 0 { - return ctx.clone(); + if ctx.get_chain_depth() > 0 { + return *ctx; } + let next_versions = get_num_versions(blockid, ctx) + 1; + let max_versions = if ctx.inline() { + MAX_INLINE_VERSIONS + } else { + get_option!(max_versions) + }; + // If this block version we're about to add will hit the version limit - if get_num_versions(blockid) + 1 >= get_option!(max_versions) { + if next_versions >= max_versions { // Produce a generic context that stores no type information, // but still respects the stack_size and sp_offset constraints. // This new context will then match all future requests. - let mut generic_ctx = Context::default(); - generic_ctx.stack_size = ctx.stack_size; - generic_ctx.sp_offset = ctx.sp_offset; + let generic_ctx = ctx.get_generic_ctx(); + + if cfg!(debug_assertions) { + let mut ctx = ctx.clone(); + if ctx.inline() { + // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible + // to keep inlining blocks until we hit the limit, but it's safe to give up inlining. + ctx.inline_block = None; + assert!(generic_ctx.inline_block == None); + } - debug_assert_ne!( - usize::MAX, - ctx.diff(&generic_ctx), - "should substitute a compatible context", - ); + assert_ne!( + TypeDiff::Incompatible, + ctx.diff(&generic_ctx), + "should substitute a compatible context", + ); + } return generic_ctx; } + if ctx.inline() { + incr_counter_to!(max_inline_versions, next_versions); + } - return ctx.clone(); + return *ctx; } -/// Keep track of a block version. Block should be fully constructed. -/// Uses `cb` for running write barriers. -fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { - let block = blockref.borrow(); +/// Install a block version into its [IseqPayload], letting the GC track its +/// lifetime, and allowing it to be considered for use for other +/// blocks we might generate. Uses `cb` for running write barriers. +/// +/// # Safety +/// +/// The block must be fully initialized. Its incoming and outgoing edges, +/// if there are any, must point to initialized blocks, too. +/// +/// Note that the block might gain edges after this function returns, +/// as can happen during [gen_block_series]. Initialized here doesn't mean +/// ready to be consumed or that the machine code tracked by the block is +/// ready to be run. +/// +/// Due to this transient state where a block is tracked by the GC by +/// being inside an [IseqPayload] but not ready to be executed, it's +/// generally unsound to call any Ruby methods during codegen. That has +/// the potential to run blocks which are not ready. +unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { + // SAFETY: caller ensures initialization + let block = unsafe { blockref.as_ref() }; // Function entry blocks must have stack size 0 - assert!(!(block.blockid.idx == 0 && block.ctx.stack_size > 0)); + debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0)); + + let version_list = get_or_create_version_list(block.get_blockid()); - let version_list = get_or_create_version_list(block.blockid); + // If this the first block being compiled with this block id + if version_list.len() == 0 { + incr_counter!(compiled_blockid_count); + } - version_list.push(blockref.clone()); + version_list.push(blockref); version_list.shrink_to_fit(); // By writing the new block to the iseq, the iseq now // contains new references to Ruby objects. Run write barriers. - let iseq: VALUE = block.blockid.iseq.into(); - for &dep in block.iter_cme_deps() { + let iseq: VALUE = block.iseq.get().into(); + for dep in block.iter_cme_deps() { obj_written!(iseq, dep.into()); } // Run write barriers for all objects in generated code. - for offset in &block.gc_obj_offsets { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); @@ -964,18 +2371,21 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { } incr_counter!(compiled_block_count); + if Context::decode(block.ctx).inline() { + incr_counter!(inline_block_count); + } // Mark code pages for code GC - let iseq_payload = get_iseq_payload(block.blockid.iseq).unwrap(); - for page in cb.addrs_to_pages(block.start_addr.unwrap(), block.end_addr.unwrap()) { + let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap(); + for page in cb.addrs_to_pages(block.start_addr, block.end_addr.get()) { iseq_payload.pages.insert(page); } } /// Remove a block version from the version map of its parent ISEQ fn remove_block_version(blockref: &BlockRef) { - let block = blockref.borrow(); - let version_list = match get_version_list(block.blockid) { + let block = unsafe { blockref.as_ref() }; + let version_list = match get_version_list(block.get_blockid()) { Some(version_list) => version_list, None => return, }; @@ -984,241 +2394,228 @@ fn remove_block_version(blockref: &BlockRef) { version_list.retain(|other| blockref != other); } -//=========================================================================== -// I put the implementation of traits for core.rs types below -// We can move these closer to the above structs later if we want. -//=========================================================================== +impl<'a> JITState<'a> { + // Finish compiling and turn a jit state into a block + // note that the block is still not in shape. + pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec<u32>) -> BlockRef { + // Allocate the block and get its pointer + let blockref: *mut MaybeUninit<Block> = Box::into_raw(Box::new(MaybeUninit::uninit())); + + incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len()); + + let ctx = Context::encode(&self.get_starting_ctx()); + + // Make the new block + let block = MaybeUninit::new(Block { + start_addr, + iseq: Cell::new(self.get_iseq()), + iseq_range: self.get_starting_insn_idx()..end_insn_idx, + ctx, + end_addr: Cell::new(end_addr), + incoming: MutableBranchList(Cell::default()), + gc_obj_offsets: gc_obj_offsets.into_boxed_slice(), + entry_exit: self.get_block_entry_exit(), + cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(), + // Pending branches => actual branches + outgoing: MutableBranchList(Cell::new(self.pending_outgoing.into_iter().map(|pending_out| { + let pending_out = Rc::try_unwrap(pending_out) + .unwrap_or_else(|rc| panic!( + "PendingBranchRef should be unique when ready to construct a Block. \ + strong={} weak={}", Rc::strong_count(&rc), Rc::weak_count(&rc))); + pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box")) + }).collect())) + }); + // Initialize it on the heap + // SAFETY: allocated with Box above + unsafe { ptr::write(blockref, block) }; -impl Block { - pub fn new(blockid: BlockId, ctx: &Context) -> BlockRef { - let block = Block { - blockid, - end_idx: 0, - ctx: ctx.clone(), - start_addr: None, - end_addr: None, - incoming: Vec::new(), - outgoing: Vec::new(), - gc_obj_offsets: Vec::new(), - cme_dependencies: Vec::new(), - entry_exit: None, - }; + // Block is initialized now. Note that MaybeUninit<T> has the same layout as T. + let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box"); + + // Track all the assumptions the block makes as invariants + if self.block_assumes_single_ractor { + track_single_ractor_assumption(blockref); + } + for bop in self.bop_assumptions { + track_bop_assumption(blockref, bop); + } + // SAFETY: just allocated it above + for cme in unsafe { blockref.as_ref() }.cme_dependencies.iter() { + track_method_lookup_stability_assumption(blockref, cme.get()); + } + if let Some(idlist) = self.stable_constant_names_assumption { + track_stable_constant_names_assumption(blockref, idlist); + } + for klass in self.no_singleton_class_assumptions { + track_no_singleton_class_assumption(blockref, klass); + } + if self.no_ep_escape { + track_no_ep_escape_assumption(blockref, self.iseq); + } - // Wrap the block in a reference counted refcell - // so that the block ownership can be shared - BlockRef::new(Rc::new(RefCell::new(block))) + blockref } +} +impl Block { pub fn get_blockid(&self) -> BlockId { - self.blockid + BlockId { iseq: self.iseq.get(), idx: self.iseq_range.start } } - pub fn get_end_idx(&self) -> u32 { - self.end_idx + pub fn get_end_idx(&self) -> IseqIdx { + self.iseq_range.end } - pub fn get_ctx(&self) -> Context { - self.ctx.clone() + pub fn get_ctx_count(&self) -> usize { + let mut count = 1; // block.ctx + self.outgoing.for_each(|branch| { + // SAFETY: &self implies it's initialized + count += unsafe { branch.as_ref() }.get_stub_count(); + }); + count } #[allow(unused)] - pub fn get_start_addr(&self) -> Option<CodePtr> { + pub fn get_start_addr(&self) -> CodePtr { self.start_addr } #[allow(unused)] - pub fn get_end_addr(&self) -> Option<CodePtr> { - self.end_addr + pub fn get_end_addr(&self) -> CodePtr { + self.end_addr.get() } /// Get an immutable iterator over cme dependencies - pub fn iter_cme_deps(&self) -> std::slice::Iter<'_, CmePtr> { - self.cme_dependencies.iter() - } - - /// Set the starting address in the generated code for the block - /// This can be done only once for a block - pub fn set_start_addr(&mut self, addr: CodePtr) { - assert!(self.start_addr.is_none()); - self.start_addr = Some(addr); + pub fn iter_cme_deps(&self) -> impl Iterator<Item = CmePtr> + '_ { + self.cme_dependencies.iter().map(Cell::get) } - /// Set the end address in the generated for the block - /// This can be done only once for a block - pub fn set_end_addr(&mut self, addr: CodePtr) { - // The end address can only be set after the start address is set - assert!(self.start_addr.is_some()); - - // TODO: assert constraint that blocks can shrink but not grow in length - self.end_addr = Some(addr); + // Push an incoming branch ref and shrink the vector + fn push_incoming(&self, branch: BranchRef) { + self.incoming.push(branch); } - /// Set the index of the last instruction in the block - /// This can be done only once for a block - pub fn set_end_idx(&mut self, end_idx: u32) { - assert!(self.end_idx == 0); - self.end_idx = end_idx; + // Compute the size of the block code + pub fn code_size(&self) -> usize { + (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap() } +} - pub fn add_gc_obj_offsets(self: &mut Block, gc_offsets: Vec<u32>) { - for offset in gc_offsets { - self.gc_obj_offsets.push(offset); - incr_counter!(num_gc_obj_refs); - } - self.gc_obj_offsets.shrink_to_fit(); +impl Context { + pub fn get_stack_size(&self) -> u8 { + self.stack_size } - /// Instantiate a new CmeDependency struct and add it to the list of - /// dependencies for this block. - pub fn add_cme_dependency(&mut self, callee_cme: CmePtr) { - self.cme_dependencies.push(callee_cme); - self.cme_dependencies.shrink_to_fit(); + pub fn set_stack_size(&mut self, stack_size: u8) { + self.stack_size = stack_size; } - // Push an incoming branch ref and shrink the vector - fn push_incoming(&mut self, branch: BranchRef) { - self.incoming.push(branch); - self.incoming.shrink_to_fit(); + /// Create a new Context that is compatible with self but doesn't have type information. + pub fn get_generic_ctx(&self) -> Context { + let mut generic_ctx = Context::default(); + generic_ctx.stack_size = self.stack_size; + generic_ctx.sp_offset = self.sp_offset; + generic_ctx.reg_mapping = self.reg_mapping; + if self.is_return_landing() { + generic_ctx.set_as_return_landing(); + } + if self.is_deferred() { + generic_ctx.mark_as_deferred(); + } + generic_ctx } - // Push an outgoing branch ref and shrink the vector - fn push_outgoing(&mut self, branch: BranchRef) { - self.outgoing.push(branch); - self.outgoing.shrink_to_fit(); + /// Create a new Context instance with a given stack_size and sp_offset adjusted + /// accordingly. This is useful when you want to virtually rewind a stack_size for + /// generating a side exit while considering past sp_offset changes on gen_save_sp. + pub fn with_stack_size(&self, stack_size: u8) -> Context { + let mut ctx = *self; + ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8; + ctx.stack_size = stack_size; + ctx } - // Compute the size of the block code - pub fn code_size(&self) -> usize { - (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize) + pub fn get_sp_offset(&self) -> i8 { + self.sp_offset } -} -impl Context { - pub fn get_stack_size(&self) -> u16 { - self.stack_size + pub fn set_sp_offset(&mut self, offset: i8) { + self.sp_offset = offset; } - pub fn get_sp_offset(&self) -> i16 { - self.sp_offset + pub fn get_reg_mapping(&self) -> RegMapping { + self.reg_mapping } - pub fn set_sp_offset(&mut self, offset: i16) { - self.sp_offset = offset; + pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) { + self.reg_mapping = reg_mapping; } pub fn get_chain_depth(&self) -> u8 { self.chain_depth } - pub fn reset_chain_depth(&mut self) { + pub fn reset_chain_depth_and_defer(&mut self) { self.chain_depth = 0; + self.is_deferred = false; } pub fn increment_chain_depth(&mut self) { + if self.get_chain_depth() == CHAIN_DEPTH_MAX { + panic!("max block version chain depth reached!"); + } self.chain_depth += 1; } - /// Get an operand for the adjusted stack pointer address - pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd { - let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes; - let offset = offset as i32; - return Opnd::mem(64, SP, offset); + pub fn set_as_return_landing(&mut self) { + self.is_return_landing = true; } - /// Push one new value on the temp stack with an explicit mapping - /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { - // If type propagation is disabled, store no types - if get_option!(no_type_prop) { - return self.stack_push_mapping((mapping, Type::Unknown)); - } - - let stack_size: usize = self.stack_size.into(); - - // Keep track of the type and mapping of the value - if stack_size < MAX_TEMP_TYPES { - self.temp_mapping[stack_size] = mapping; - self.temp_types[stack_size] = temp_type; - - if let MapToLocal(idx) = mapping { - assert!((idx as usize) < MAX_LOCAL_TYPES); - } - } - - self.stack_size += 1; - self.sp_offset += 1; - - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - return Opnd::mem(64, SP, offset); + pub fn clear_return_landing(&mut self) { + self.is_return_landing = false; } - /// Push one new value on the temp stack - /// Return a pointer to the new stack top - pub fn stack_push(&mut self, val_type: Type) -> Opnd { - return self.stack_push_mapping((MapToStack, val_type)); + pub fn is_return_landing(&self) -> bool { + self.is_return_landing } - /// Push the self value on the stack - pub fn stack_push_self(&mut self) -> Opnd { - return self.stack_push_mapping((MapToSelf, Type::Unknown)); + pub fn mark_as_deferred(&mut self) { + self.is_deferred = true; } - /// Push a local variable on the stack - pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { - if local_idx >= MAX_LOCAL_TYPES { - return self.stack_push(Type::Unknown); - } - - return self.stack_push_mapping((MapToLocal(local_idx as u8), Type::Unknown)); + pub fn is_deferred(&self) -> bool { + self.is_deferred } - // Pop N values off the stack - // Return a pointer to the stack top before the pop operation - pub fn stack_pop(&mut self, n: usize) -> Opnd { - assert!(n <= self.stack_size.into()); - - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - let top = Opnd::mem(64, SP, offset); - - // Clear the types of the popped values - for i in 0..n { - let idx: usize = (self.stack_size as usize) - i - 1; - - if idx < MAX_TEMP_TYPES { - self.temp_types[idx] = Type::Unknown; - self.temp_mapping[idx] = MapToStack; - } - } - - self.stack_size -= n as u16; - self.sp_offset -= n as i16; - - return top; + /// Get an operand for the adjusted stack pointer address + pub fn sp_opnd(&self, offset: i32) -> Opnd { + let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32; + return Opnd::mem(64, SP, offset); } - pub fn shift_stack(&mut self, argc: usize) { - assert!(argc < self.stack_size.into()); - - let method_name_index = (self.stack_size - argc as u16 - 1) as usize; - - for i in method_name_index..(self.stack_size - 1) as usize { + /// Get an operand for the adjusted environment pointer address using SP register. + /// This is valid only when a Binding object hasn't been created for the frame. + pub fn ep_opnd(&self, offset: i32) -> Opnd { + let ep_offset = self.get_stack_size() as i32 + 1; + self.sp_opnd(-ep_offset + offset) + } - if i + 1 < MAX_TEMP_TYPES { - self.temp_types[i] = self.temp_types[i + 1]; - self.temp_mapping[i] = self.temp_mapping[i + 1]; - } + /// Start using a register for a given stack temp or a local. + pub fn alloc_reg(&mut self, opnd: RegOpnd) { + let mut reg_mapping = self.get_reg_mapping(); + if reg_mapping.alloc_reg(opnd) { + self.set_reg_mapping(reg_mapping); } - self.stack_pop(1); } - /// Get an operand pointing to a slot on the temp stack - pub fn stack_opnd(&self, idx: i32) -> Opnd { - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32); - let opnd = Opnd::mem(64, SP, offset); - return opnd; + /// Stop using a register for a given stack temp or a local. + /// This allows us to reuse the register for a value that we know is dead + /// and will no longer be used (e.g. popped stack temp). + pub fn dealloc_reg(&mut self, opnd: RegOpnd) { + let mut reg_mapping = self.get_reg_mapping(); + if reg_mapping.dealloc_reg(opnd) { + self.set_reg_mapping(reg_mapping); + } } /// Get the type of an instruction operand @@ -1230,18 +2627,18 @@ impl Context { let stack_idx: usize = (self.stack_size - 1 - idx).into(); // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return Type::Unknown; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); match mapping { MapToSelf => self.self_type, - MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize], - MapToLocal(idx) => { - assert!((idx as usize) < MAX_LOCAL_TYPES); - return self.local_types[idx as usize]; + MapToStack(temp_type) => temp_type, + MapToLocal(local_idx) => { + assert!((local_idx as usize) < MAX_CTX_LOCALS); + return self.get_local_type(local_idx.into()); } } } @@ -1249,8 +2646,24 @@ impl Context { } /// Get the currently tracked type for a local variable - pub fn get_local_type(&self, idx: usize) -> Type { - *self.local_types.get(idx).unwrap_or(&Type::Unknown) + pub fn get_local_type(&self, local_idx: usize) -> Type { + if local_idx >= MAX_CTX_LOCALS { + Type::Unknown + } else { + self.local_types[local_idx] + } + } + + /// Get the current temp mapping for a given stack slot + fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { + assert!(temp_idx < MAX_CTX_TEMPS); + self.temp_mapping[temp_idx] + } + + /// Set the current temp mapping for a given stack slot + fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { + assert!(temp_idx < MAX_CTX_TEMPS); + self.temp_mapping[temp_idx] = mapping; } /// Upgrade (or "learn") the type of an instruction operand @@ -1270,19 +2683,27 @@ impl Context { let stack_idx = (self.stack_size - 1 - idx) as usize; // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); match mapping { MapToSelf => self.self_type.upgrade(opnd_type), - MapToStack => self.temp_types[stack_idx].upgrade(opnd_type), - MapToLocal(idx) => { - let idx = idx as usize; - assert!(idx < MAX_LOCAL_TYPES); - self.local_types[idx].upgrade(opnd_type); + MapToStack(mut temp_type) => { + temp_type.upgrade(opnd_type); + self.set_temp_mapping(stack_idx, TempMapping::MapToStack(temp_type)); + } + MapToLocal(local_idx) => { + let idx = local_idx as usize; + assert!(idx < MAX_CTX_LOCALS); + let mut new_type = self.get_local_type(idx); + new_type.upgrade(opnd_type); + self.set_local_type(idx, new_type); + // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches + // all MapToLocal mappings, including the one we're upgrading here. + self.set_opnd_mapping(opnd, mapping); } } } @@ -1294,29 +2715,29 @@ impl Context { This is can be used with stack_push_mapping or set_opnd_mapping to copy a stack value's type while maintaining the mapping. */ - pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> (TempMapping, Type) { + pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping { let opnd_type = self.get_opnd_type(opnd); match opnd { - SelfOpnd => (MapToSelf, opnd_type), + SelfOpnd => TempMapping::MapToSelf, StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; - if stack_idx < MAX_TEMP_TYPES { - (self.temp_mapping[stack_idx], opnd_type) + if stack_idx < MAX_CTX_TEMPS { + self.get_temp_mapping(stack_idx) } else { // We can't know the source of this stack operand, so we assume it is // a stack-only temporary. type will be UNKNOWN assert!(opnd_type == Type::Unknown); - (MapToStack, opnd_type) + TempMapping::MapToStack(opnd_type) } } } } /// Overwrite both the type and mapping of a stack operand. - pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, (mapping, opnd_type): (TempMapping, Type)) { + pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) { match opnd { SelfOpnd => unreachable!("self always maps to self"), StackOpnd(idx) => { @@ -1329,48 +2750,46 @@ impl Context { } // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return; } - self.temp_mapping[stack_idx] = mapping; - - // Only used when mapping == MAP_STACK - self.temp_types[stack_idx] = opnd_type; + self.set_temp_mapping(stack_idx, mapping); } } } /// Set the type of a local variable pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) { - let ctx = self; - // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } - if local_idx >= MAX_LOCAL_TYPES { - return; + if local_idx >= MAX_CTX_LOCALS { + return } // If any values on the stack map to this local we must detach them - for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, + for mapping_idx in 0..MAX_CTX_TEMPS { + let mapping = self.get_temp_mapping(mapping_idx); + let tm = match mapping { + MapToStack(_) => mapping, + MapToSelf => mapping, MapToLocal(idx) => { if idx as usize == local_idx { - ctx.temp_types[i] = ctx.local_types[idx as usize]; - MapToStack + let local_type = self.get_local_type(local_idx); + TempMapping::MapToStack(local_type) } else { - MapToLocal(idx) + TempMapping::MapToLocal(idx) } } - } + }; + self.set_temp_mapping(mapping_idx, tm); } - ctx.local_types[local_idx] = local_type; + // Update the type + self.local_types[local_idx] = local_type; } /// Erase local variable type information @@ -1378,99 +2797,273 @@ impl Context { pub fn clear_local_types(&mut self) { // When clearing local types we must detach any stack mappings to those // locals. Even if local values may have changed, stack values will not. - for (i, mapping) in self.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, - MapToLocal(idx) => { - self.temp_types[i] = self.local_types[idx as usize]; - MapToStack - } + + for mapping_idx in 0..MAX_CTX_TEMPS { + let mapping = self.get_temp_mapping(mapping_idx); + if let MapToLocal(local_idx) = mapping { + let local_idx = local_idx as usize; + self.set_temp_mapping(mapping_idx, TempMapping::MapToStack(self.get_local_type(local_idx))); } } // Clear the local types - self.local_types = [Type::default(); MAX_LOCAL_TYPES]; + self.local_types = [Type::default(); MAX_CTX_LOCALS]; + } + + /// Return true if the code is inlined by the caller + pub fn inline(&self) -> bool { + self.inline_block.is_some() + } + + /// Set a block ISEQ given to the Block of this Context + pub fn set_inline_block(&mut self, iseq: IseqPtr) { + self.inline_block = Some(iseq); } /// Compute a difference score for two context objects - /// Returns 0 if the two contexts are the same - /// Returns > 0 if different but compatible - /// Returns usize::MAX if incompatible - pub fn diff(&self, dst: &Context) -> usize { + pub fn diff(&self, dst: &Context) -> TypeDiff { // Self is the source context (at the end of the predecessor) let src = self; // Can only lookup the first version in the chain - if dst.chain_depth != 0 { - return usize::MAX; + if dst.get_chain_depth() != 0 { + return TypeDiff::Incompatible; } // Blocks with depth > 0 always produce new versions // Sidechains cannot overlap - if src.chain_depth != 0 { - return usize::MAX; + if src.get_chain_depth() != 0 { + return TypeDiff::Incompatible; + } + + if src.is_return_landing() != dst.is_return_landing() { + return TypeDiff::Incompatible; + } + + if src.is_deferred() != dst.is_deferred() { + return TypeDiff::Incompatible; } if dst.stack_size != src.stack_size { - return usize::MAX; + return TypeDiff::Incompatible; } if dst.sp_offset != src.sp_offset { - return usize::MAX; + return TypeDiff::Incompatible; + } + + if dst.reg_mapping != src.reg_mapping { + return TypeDiff::Incompatible; } // Difference sum let mut diff = 0; // Check the type of self - let self_diff = src.self_type.diff(dst.self_type); + diff += match src.self_type.diff(dst.self_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; - if self_diff == usize::MAX { - return usize::MAX; + // Check the block to inline + if src.inline_block != dst.inline_block { + // find_block_version should not find existing blocks with different + // inline_block so that their yield will not be megamorphic. + return TypeDiff::Incompatible; } - diff += self_diff; - // For each local type we track - for i in 0..src.local_types.len() { - let t_src = src.local_types[i]; - let t_dst = dst.local_types[i]; - let temp_diff = t_src.diff(t_dst); - - if temp_diff == usize::MAX { - return usize::MAX; - } - - diff += temp_diff; + for i in 0.. MAX_CTX_LOCALS { + let t_src = src.get_local_type(i); + let t_dst = dst.get_local_type(i); + diff += match t_src.diff(t_dst) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; } // For each value on the temp stack for i in 0..src.stack_size { - let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i)); - let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i)); + let src_mapping = src.get_opnd_mapping(StackOpnd(i)); + let dst_mapping = dst.get_opnd_mapping(StackOpnd(i)); // If the two mappings aren't the same if src_mapping != dst_mapping { - if dst_mapping == MapToStack { + if matches!(dst_mapping, MapToStack(_)) { // We can safely drop information about the source of the temp // stack operand. diff += 1; } else { - return usize::MAX; + return TypeDiff::Incompatible; } } - let temp_diff = src_type.diff(dst_type); + let src_type = src.get_opnd_type(StackOpnd(i)); + let dst_type = dst.get_opnd_type(StackOpnd(i)); + + diff += match src_type.diff(dst_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; + } - if temp_diff == usize::MAX { - return usize::MAX; + return TypeDiff::Compatible(diff); + } + + /// Basically diff() but allows RegMapping incompatibility that could be fixed by + /// spilling, loading, or shuffling registers. + pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff { + // We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack. + // If dst has RegOpnd::Stack, we can't reuse the block as a callee. + for reg_opnd in dst.get_reg_mapping().get_reg_opnds() { + if matches!(reg_opnd, RegOpnd::Stack(_)) { + return TypeDiff::Incompatible; } + } - diff += temp_diff; + // Prepare a Context with the same registers + let mut dst_with_same_regs = dst.clone(); + dst_with_same_regs.set_reg_mapping(self.get_reg_mapping()); + + // Diff registers and other stuff separately, and merge them + if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) { + TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping())) + } else { + TypeDiff::Incompatible + } + } + + pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> { + if jit.at_compile_target() { + let comptime_recv = jit.peek_at_stack(self, 1); + let comptime_arg = jit.peek_at_stack(self, 0); + return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p()); } - return diff; + let recv_type = self.get_opnd_type(StackOpnd(1)); + let arg_type = self.get_opnd_type(StackOpnd(0)); + match (recv_type, arg_type) { + (Type::Fixnum, Type::Fixnum) => Some(true), + (Type::Unknown | Type::UnknownImm, Type::Unknown | Type::UnknownImm) => None, + _ => Some(false), + } + } +} + +impl Assembler { + /// Push one new value on the temp stack with an explicit mapping + /// Return a pointer to the new stack top + pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd { + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return self.stack_push_mapping(mapping.without_type()); + } + + let stack_size: usize = self.ctx.stack_size.into(); + + // Keep track of the type and mapping of the value + if stack_size < MAX_CTX_TEMPS { + self.ctx.set_temp_mapping(stack_size, mapping); + + if let MapToLocal(local_idx) = mapping { + assert!((local_idx as usize) < MAX_CTX_LOCALS); + } + } + + self.ctx.stack_size += 1; + self.ctx.sp_offset += 1; + + // Allocate a register to the new stack operand + let stack_opnd = self.stack_opnd(0); + self.alloc_reg(stack_opnd.reg_opnd()); + + stack_opnd + } + + /// Push one new value on the temp stack + /// Return a pointer to the new stack top + pub fn stack_push(&mut self, val_type: Type) -> Opnd { + return self.stack_push_mapping(TempMapping::MapToStack(val_type)); + } + + /// Push the self value on the stack + pub fn stack_push_self(&mut self) -> Opnd { + return self.stack_push_mapping(TempMapping::MapToSelf); + } + + /// Push a local variable on the stack + pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { + if local_idx >= MAX_CTX_LOCALS { + return self.stack_push(Type::Unknown); + } + + return self.stack_push_mapping(TempMapping::MapToLocal(local_idx as u8)); + } + + // Pop N values off the stack + // Return a pointer to the stack top before the pop operation + pub fn stack_pop(&mut self, n: usize) -> Opnd { + assert!(n <= self.ctx.stack_size.into()); + + let top = self.stack_opnd(0); + + // Clear the types of the popped values + for i in 0..n { + let idx: usize = (self.ctx.stack_size as usize) - i - 1; + + if idx < MAX_CTX_TEMPS { + self.ctx.set_temp_mapping(idx, TempMapping::MapToStack(Type::Unknown)); + } + } + + self.ctx.stack_size -= n as u8; + self.ctx.sp_offset -= n as i8; + + return top; + } + + /// Shift stack temps to remove a Symbol for #send. + pub fn shift_stack(&mut self, argc: usize) { + assert!(argc < self.ctx.stack_size.into()); + + let method_name_index = (self.ctx.stack_size as usize) - argc - 1; + + for i in method_name_index..(self.ctx.stack_size - 1) as usize { + if i < MAX_CTX_TEMPS { + let next_arg_mapping = if i + 1 < MAX_CTX_TEMPS { + self.ctx.get_temp_mapping(i + 1) + } else { + TempMapping::MapToStack(Type::Unknown) + }; + self.ctx.set_temp_mapping(i, next_arg_mapping); + } + } + self.stack_pop(1); + } + + /// Get an operand pointing to a slot on the temp stack + pub fn stack_opnd(&self, idx: i32) -> Opnd { + Opnd::Stack { + idx, + num_bits: 64, + stack_size: self.ctx.stack_size, + num_locals: None, // not needed for stack temps + sp_offset: self.ctx.sp_offset, + reg_mapping: None, // push_insn will set this + } + } + + /// Get an operand pointing to a local variable + pub fn local_opnd(&self, ep_offset: u32) -> Opnd { + let idx = self.ctx.stack_size as i32 + ep_offset as i32; + Opnd::Stack { + idx, + num_bits: 64, + stack_size: self.ctx.stack_size, + num_locals: Some(self.get_num_locals().unwrap()), // this must exist for locals + sp_offset: self.ctx.sp_offset, + reg_mapping: None, // push_insn will set this + } } } @@ -1479,7 +3072,7 @@ impl BlockId { #[cfg(debug_assertions)] #[allow(dead_code)] pub fn dump_src_loc(&self) { - unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx) } + unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx as u32) } } } @@ -1513,51 +3106,57 @@ fn gen_block_series_body( let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE); // Generate code for the first block - let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?; - batch.push(first_block.clone()); // Keep track of this block version + let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?; + batch.push(first_block); // Keep track of this block version // Add the block version to the VersionMap for this ISEQ - add_block_version(&first_block, cb); + unsafe { add_block_version(first_block, cb) }; // Loop variable - let mut last_blockref = first_block.clone(); + let mut last_blockref = first_block; loop { // Get the last outgoing branch from the previous block. - let last_branchref = { - let last_block = last_blockref.borrow(); - match last_block.outgoing.last() { - Some(branch) => branch.clone(), + // SAFETY: No cell mutation inside unsafe. Copying out a BranchRef. + let last_branchref: BranchRef = unsafe { + let last_block = last_blockref.as_ref(); + match last_block.outgoing.0.ref_unchecked().last() { + Some(branch) => *branch, None => { break; } // If last block has no branches, stop. } }; - let mut last_branch = last_branchref.borrow_mut(); + let last_branch = unsafe { last_branchref.as_ref() }; + + incr_counter!(block_next_count); // gen_direct_jump() can request a block to be placed immediately after by // leaving a single target that has a `None` address. - let last_target = match &mut last_branch.targets { - [Some(last_target), None] if last_target.get_address().is_none() => last_target, - _ => break + // SAFETY: no mutation inside the unsafe block + let (requested_blockid, requested_ctx) = unsafe { + match (last_branch.targets[0].ref_unchecked(), last_branch.targets[1].ref_unchecked()) { + (Some(last_target), None) if last_target.get_address().is_none() => { + (last_target.get_blockid(), last_target.get_ctx()) + } + _ => { + // We're done when no fallthrough block is requested + break; + } + } }; - incr_counter!(block_next_count); - - // Get id and context for the new block - let requested_blockid = last_target.get_blockid(); - let requested_ctx = last_target.get_ctx(); - // Generate new block using context from the last branch. - let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb); + let requested_ctx = Context::decode(requested_ctx); + let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false); // If the block failed to compile if result.is_err() { // Remove previously compiled block // versions from the version map - mem::drop(last_branch); // end borrow - for blockref in &batch { - free_block(blockref); - remove_block_version(blockref); + for blockref in batch { + remove_block_version(&blockref); + // SAFETY: block was well connected because it was in a version_map + unsafe { free_block(blockref, false) }; } // Stop compiling @@ -1567,16 +3166,14 @@ fn gen_block_series_body( let new_blockref = result.unwrap(); // Add the block version to the VersionMap for this ISEQ - add_block_version(&new_blockref, cb); + unsafe { add_block_version(new_blockref, cb) }; // Connect the last branch and the new block - last_branch.targets[0] = Some(Box::new(BranchTarget::Block(new_blockref.clone()))); - new_blockref - .borrow_mut() - .push_incoming(last_branchref.clone()); + last_branch.targets[0].set(Some(Box::new(BranchTarget::Block(new_blockref)))); + unsafe { new_blockref.as_ref().incoming.push(last_branchref) }; // Track the block - batch.push(new_blockref.clone()); + batch.push(new_blockref); // Repeat with newest block last_blockref = new_blockref; @@ -1587,12 +3184,12 @@ fn gen_block_series_body( // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. // If so, we print the new blocks to the console. if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { - let blockid_idx = blockid.idx; - let iseq_location = iseq_get_location(blockid.iseq, blockid_idx); + let iseq_location = iseq_get_location(blockid.iseq, blockid.idx); if iseq_location.contains(substr) { - let last_block = last_blockref.borrow(); - println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid_idx, last_block.end_idx); - print!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx)); + let last_block = unsafe { last_blockref.as_ref() }; + let iseq_range = &last_block.iseq_range; + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, iseq_range.start, iseq_range.end); + print!("{}", disasm_iseq_insn_range(blockid.iseq, iseq_range.start, iseq_range.end)); } } } @@ -1602,12 +3199,17 @@ fn gen_block_series_body( /// Generate a block version that is an entry point inserted into an iseq /// NOTE: this function assumes that the VM lock has been taken -pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See jit_compile_exception() for details. +pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> { // Compute the current instruction index based on the current PC - let insn_idx: u32 = unsafe { - let pc_zero = rb_iseq_pc_at_idx(iseq, 0); - let ec_pc = get_cfp_pc(get_ec_cfp(ec)); - ec_pc.offset_from(pc_zero).try_into().ok()? + let cfp = unsafe { get_ec_cfp(ec) }; + let insn_idx: u16 = unsafe { + let ec_pc = get_cfp_pc(cfp); + iseq_pc_to_insn_idx(iseq, ec_pc)? + }; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? }; // The entry context makes no assumptions about types @@ -1620,77 +3222,247 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); - // Write the interpreter entry prologue. Might be NULL when out of memory. - let code_ptr = gen_entry_prologue(cb, iseq, insn_idx); - - // Try to generate code for the entry block - let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb); + let code_ptr = gen_entry_point_body(blockid, stack_size, ec, jit_exception, cb, ocb); cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); + code_ptr +} + +fn gen_entry_point_body(blockid: BlockId, stack_size: u8, ec: EcPtr, jit_exception: bool, cb: &mut CodeBlock, ocb: &mut OutlinedCb) -> Option<*const u8> { + // Write the interpreter entry prologue. Might be NULL when out of memory. + let (code_ptr, reg_mapping) = gen_entry_prologue(cb, ocb, blockid, stack_size, jit_exception)?; + + // Find or compile a block version + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + ctx.reg_mapping = reg_mapping; + let block = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new_without_iseq(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb, Some(ocb))?; + Some(blockref) + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => gen_block_series(blockid, &ctx, ec, cb, ocb), + }; + match block { // Compilation failed None => { // Trigger code GC. This entry point will be recompiled later. - cb.code_gc(); + if get_option!(code_gc) { + cb.code_gc(ocb); + } return None; } // If the block contains no Ruby instructions Some(block) => { - let block = block.borrow(); - if block.end_idx == insn_idx { + let block = unsafe { block.as_ref() }; + if block.iseq_range.is_empty() { return None; } } } + // Count the number of entry points we compile + incr_counter!(compiled_iseq_entry); + // Compilation successful and block not empty - return code_ptr; + Some(code_ptr.raw_ptr(cb)) } -/// Generate code for a branch, possibly rewriting and changing the size of it -fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { - // FIXME - /* - if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { - // Generating this branch would modify frozen bytes. Do nothing. - return; +// Change the entry's jump target from an entry stub to a next entry +pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { + let mut asm = Assembler::new_without_iseq(); + asm_comment!(asm, "regenerate_entry"); + + // gen_entry_guard generates cmp + jne. We're rewriting only jne. + asm.jne(next_entry.into()); + + // Move write_pos to rewrite the entry + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb, None).expect("can rewrite existing code"); + + // Rewind write_pos to the original one + assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); + cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); +} + +pub type PendingEntryRef = Rc<PendingEntry>; + +/// Create a new entry reference for an ISEQ +pub fn new_pending_entry() -> PendingEntryRef { + let entry = PendingEntry { + uninit_entry: Box::new(MaybeUninit::uninit()), + start_addr: Cell::new(None), + end_addr: Cell::new(None), + }; + return Rc::new(entry); +} + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_entry_stub]. + fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { + with_compile_time(|| { + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb) + .unwrap_or_else(|| { + // Trigger code GC (e.g. no space). + // This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + CodegenGlobals::get_stub_exit_code().raw_ptr(cb) + }); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + addr + }) + }) } - */ +} - // Remove old comments - if let (Some(start_addr), Some(end_addr)) = (branch.start_addr, branch.end_addr) { - cb.remove_comments(start_addr, end_addr) +/// Called by the generated code when an entry stub is executed +fn entry_stub_hit_body( + entry_ptr: *const c_void, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb +) -> Option<*const u8> { + // Get ISEQ and insn_idx from the current ec->cfp + let cfp = unsafe { get_ec_cfp(ec) }; + let iseq = unsafe { get_cfp_iseq(cfp) }; + let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; + let blockid = BlockId { iseq, idx: insn_idx }; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; + + // Compile a new entry guard as a next entry + let next_entry = cb.get_write_ptr(); + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); + let pending_entry = gen_entry_chain_guard(&mut asm, ocb, blockid)?; + let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size); + asm.compile(cb, Some(ocb))?; + + // Find or compile a block version + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + ctx.reg_mapping = reg_mapping; + let blockref = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new_without_iseq(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb, Some(ocb))?; + Some(blockref) + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => gen_block_series(blockid, &ctx, ec, cb, ocb), + }; + + // Commit or retry the entry + if blockref.is_some() { + // Regenerate the previous entry + let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); + regenerate_entry(cb, &entryref, next_entry); + + // Write an entry to the heap and push it to the ISEQ + let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); + get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); } - let branch_terminates_block = branch.end_addr == branch.block.borrow().end_addr; + // Return a code pointer if the block is successfully compiled. The entry stub needs + // to jump to the entry preceding the block to load the registers in reg_mapping. + blockref.map(|_block| next_entry.raw_ptr(cb)) +} + +/// Generate a stub that calls entry_stub_hit +pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + + let mut asm = Assembler::new_without_iseq(); + asm_comment!(asm, "entry stub hit"); + + asm.mov(C_ARG_OPNDS[0], entry_address.into()); + + // Jump to trampoline to call entry_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so +/// it's useful for Code GC to call entry_stub_hit from a globally shared code. +pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new_without_iseq(); + + // See gen_entry_guard for how it's used. + asm_comment!(asm, "entry_stub_hit() trampoline"); + let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); + + // Jump to the address returned by the entry_stub_hit() call + asm.jmp_opnd(jump_addr); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Generate code for a branch, possibly rewriting and changing the size of it +fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { + // Remove old comments + cb.remove_comments(branch.start_addr, branch.end_addr.get()); + + // SAFETY: having a &Branch implies branch.block is initialized. + let block = unsafe { branch.block.get().as_ref() }; + + let branch_terminates_block = branch.end_addr.get() == block.get_end_addr(); // Generate the branch - let mut asm = Assembler::new(); - asm.comment("regenerate_branch"); - (branch.gen_fn)( + let mut asm = Assembler::new_without_iseq(); + asm_comment!(asm, "regenerate_branch"); + branch.gen_fn.call( &mut asm, - branch.get_target_address(0).unwrap(), - branch.get_target_address(1), - branch.shape, + Target::CodePtr(branch.get_target_address(0).unwrap()), + branch.get_target_address(1).map(|addr| Target::CodePtr(addr)), ); + // If the entire block is the branch and the block could be invalidated, + // we need to pad to ensure there is room for invalidation patching. + if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() { + asm.pad_inval_patch(); + } + // Rewrite the branch let old_write_pos = cb.get_write_pos(); let old_dropped_bytes = cb.has_dropped_bytes(); - cb.set_write_ptr(branch.start_addr.unwrap()); + cb.set_write_ptr(branch.start_addr); cb.set_dropped_bytes(false); - asm.compile(cb); + asm.compile(cb, None).expect("can rewrite existing code"); + let new_end_addr = cb.get_write_ptr(); - branch.end_addr = Some(cb.get_write_ptr()); + branch.end_addr.set(new_end_addr); // The block may have shrunk after the branch is rewritten - let mut block = branch.block.borrow_mut(); if branch_terminates_block { // Adjust block size - block.end_addr = branch.end_addr; + block.end_addr.set(new_end_addr); } // cb.write_pos is both a write cursor and a marker for the end of @@ -1708,46 +3480,38 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { // The branch sits at the end of cb and consumed some memory. // Keep cb.write_pos. } -} -/// Create a new outgoing branch entry for a block -fn make_branch_entry(block: &BlockRef, gen_fn: BranchGenFn) -> BranchRef { - let branch = Branch { - // Block this is attached to - block: block.clone(), - - // Positions where the generated code starts and ends - start_addr: None, - end_addr: None, - - // Branch target blocks and their contexts - targets: [None, None], + branch.assert_layout(); +} - // Branch code generation function - gen_fn: gen_fn, +pub type PendingBranchRef = Rc<PendingBranch>; - // Shape of the branch - shape: BranchShape::Default, - }; +/// Create a new outgoing branch entry for a block +fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchRef { + let branch = Rc::new(PendingBranch { + uninit_branch: Box::new(MaybeUninit::uninit()), + gen_fn, + start_addr: Cell::new(None), + end_addr: Cell::new(None), + targets: [Cell::new(None), Cell::new(None)], + }); // Add to the list of outgoing branches for the block - let branchref = Rc::new(RefCell::new(branch)); - block.borrow_mut().push_outgoing(branchref.clone()); - incr_counter!(compiled_branch_count); + jit.queue_outgoing_branch(branch.clone()); - return branchref; + branch } c_callable! { /// Generated code calls this function with the SysV calling convention. - /// See [set_branch_target]. + /// See [gen_branch_stub]. fn branch_stub_hit( branch_ptr: *const c_void, target_idx: u32, ec: EcPtr, ) -> *const u8 { with_vm_lock(src_loc!(), || { - branch_stub_hit_body(branch_ptr, target_idx, ec) + with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) }) }) } } @@ -1759,24 +3523,18 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - println!("branch_stub_hit"); } - assert!(!branch_ptr.is_null()); - - //branch_ptr is actually: - //branch_ptr: *const RefCell<Branch> - let branch_rc = unsafe { BranchRef::from_raw(branch_ptr as *const RefCell<Branch>) }; + let branch_ref = NonNull::<Branch>::new(branch_ptr as *mut Branch) + .expect("Branches should not be null"); - // We increment the strong count because we want to keep the reference owned - // by the branch stub alive. Return branch stubs can be hit multiple times. - unsafe { Rc::increment_strong_count(branch_ptr) }; - - let mut branch = branch_rc.borrow_mut(); + // SAFETY: We have the VM lock, and the branch is initialized by the time generated + // code calls this function. + // + // Careful, don't make a `&Block` from `branch.block` here because we might + // delete it later in delete_empty_defer_block(). + let branch = unsafe { branch_ref.as_ref() }; let branch_size_on_entry = branch.code_size(); let target_idx: usize = target_idx.as_usize(); - let target = branch.targets[target_idx].as_ref().unwrap(); - let target_blockid = target.get_blockid(); - let target_ctx = target.get_ctx(); - let target_branch_shape = match target_idx { 0 => BranchShape::Next0, 1 => BranchShape::Next1, @@ -1786,22 +3544,33 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); - // If this branch has already been patched, return the dst address - // Note: ractors can cause the same stub to be hit multiple times - if let BranchTarget::Block(_) = target.as_ref() { - return target.get_address().unwrap().raw_ptr(); - } + let (target_blockid, target_ctx): (BlockId, Context) = unsafe { + // SAFETY: no mutation of the target's Cell. Just reading out data. + let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + + // If this branch has already been patched, return the dst address + // Note: recursion can cause the same stub to be hit multiple times + if let BranchTarget::Block(_) = target.as_ref() { + return target.get_address().unwrap().raw_ptr(cb); + } + + let target_ctx = Context::decode(target.get_ctx()); + (target.get_blockid(), target_ctx) + }; let (cfp, original_interp_sp) = unsafe { let cfp = get_ec_cfp(ec); let original_interp_sp = get_cfp_sp(cfp); - let running_iseq = rb_cfp_get_iseq(cfp); - let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx); - let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); - + let running_iseq = get_cfp_iseq(cfp); assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); + let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into()); + let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); + // Unlike in the interpreter, our `leave` doesn't write to the caller's + // SP -- we do it in the returned-to code. Account for this difference. + let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into()); + // Update the PC in the current CFP, because it may be out of sync in JITted code rb_set_cfp_pc(cfp, reconned_pc); @@ -1813,83 +3582,97 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - // So we do it here instead. rb_set_cfp_sp(cfp, reconned_sp); + // Bail if code GC is disabled and we've already run out of spaces. + if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + // Bail if we're about to run out of native stack space. + // We've just reconstructed interpreter state. + if rb_ec_stack_check(ec as _) != 0 { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + // Bail if this branch is housed in an invalidated (dead) block. + // This only happens in rare invalidation scenarios and we need + // to avoid linking a dead block to a live block with a branch. + if branch.block.get().as_ref().iseq.get().is_null() { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + (cfp, original_interp_sp) }; // Try to find an existing compiled version of this block let mut block = find_block_version(target_blockid, &target_ctx); - + let mut branch_modified = false; // If this block hasn't yet been compiled if block.is_none() { - let branch_old_shape = branch.shape; - let mut branch_modified = false; + let branch_old_shape = branch.gen_fn.get_shape(); // If the new block can be generated right after the branch (at cb->write_pos) - if Some(cb.get_write_ptr()) == branch.end_addr { + if cb.get_write_ptr() == branch.end_addr.get() { // This branch should be terminating its block - assert!(branch.end_addr == branch.block.borrow().end_addr); + assert!(branch.end_addr == unsafe { branch.block.get().as_ref() }.end_addr); // Change the branch shape to indicate the target block will be placed next - branch.shape = target_branch_shape; + branch.gen_fn.set_shape(target_branch_shape); // Rewrite the branch with the new, potentially more compact shape - regenerate_branch(cb, &mut branch); + regenerate_branch(cb, branch); branch_modified = true; // Ensure that the branch terminates the codeblock just like // before entering this if block. This drops bytes off the end // in case we shrank the branch when regenerating. - cb.set_write_ptr(branch.end_addr.unwrap()); + cb.set_write_ptr(branch.end_addr.get()); } // Compile the new block version - drop(branch); // Stop mutable RefCell borrow since GC might borrow branch for marking block = gen_block_series(target_blockid, &target_ctx, ec, cb, ocb); - branch = branch_rc.borrow_mut(); if block.is_none() && branch_modified { // We couldn't generate a new block for the branch, but we modified the branch. // Restore the branch by regenerating it. - branch.shape = branch_old_shape; - regenerate_branch(cb, &mut branch); + branch.gen_fn.set_shape(branch_old_shape); + regenerate_branch(cb, branch); } } // Finish building the new block let dst_addr = match block { - Some(block_rc) => { - let mut block: RefMut<_> = block_rc.borrow_mut(); + Some(new_block) => { + let new_block = unsafe { new_block.as_ref() }; // Branch shape should reflect layout - assert!(!(branch.shape == target_branch_shape && block.start_addr != branch.end_addr)); + assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get())); + + // When block housing this branch is empty, try to free it + delete_empty_defer_block(branch, new_block, target_ctx, target_blockid); // Add this branch to the list of incoming branches for the target - block.push_incoming(branch_rc.clone()); - mem::drop(block); // end mut borrow + new_block.push_incoming(branch_ref); // Update the branch target address - branch.targets[target_idx] = Some(Box::new(BranchTarget::Block(block_rc.clone()))); + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Block(new_block.into())))); // Rewrite the branch with the new jump target address - regenerate_branch(cb, &mut branch); + regenerate_branch(cb, branch); // Restore interpreter sp, since the code hitting the stub expects the original. unsafe { rb_set_cfp_sp(cfp, original_interp_sp) }; - block_rc.borrow().start_addr.unwrap() + new_block.start_addr } None => { - // Code GC needs to borrow blocks for invalidation, so their mutable - // borrows must be dropped first. - drop(block); - drop(branch); // Trigger code GC. The whole ISEQ will be recompiled later. // We shouldn't trigger it in the middle of compilation in branch_stub_hit // because incomplete code could be used when cb.dropped_bytes is flipped // by code GC. So this place, after all compilation, is the safest place // to hook code GC on branch_stub_hit. - cb.code_gc(); - branch = branch_rc.borrow_mut(); + if get_option!(code_gc) { + cb.code_gc(ocb); + } // Failed to service the stub by generating a new block so now we // need to exit to the interpreter at the stubbed location. We are @@ -1909,88 +3692,120 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - assert!( new_branch_size <= branch_size_on_entry, "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})", - branch.start_addr.unwrap().raw_ptr(), branch_size_on_entry, new_branch_size, + branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size, ); // Return a pointer to the compiled block version - dst_addr.raw_ptr() + dst_addr.raw_ptr(cb) } -/// Set up a branch target at an index with a block version or a stub -fn set_branch_target( - target_idx: u32, - target: BlockId, - ctx: &Context, - branchref: &BranchRef, - branch: &mut Branch, - ocb: &mut OutlinedCb, -) { - let maybe_block = find_block_version(target, ctx); - - // If the block already exists - if let Some(blockref) = maybe_block { - let mut block = blockref.borrow_mut(); - - // Add an incoming branch into this block - block.push_incoming(branchref.clone()); +/// Part of branch_stub_hit(). +/// If we've hit a deferred branch, and the housing block consists solely of the branch, rewire +/// incoming branches to the new block and delete the housing block. +fn delete_empty_defer_block(branch: &Branch, new_block: &Block, target_ctx: Context, target_blockid: BlockId) +{ + // This &Block should be unique, relying on the VM lock + let housing_block: &Block = unsafe { branch.block.get().as_ref() }; + if target_ctx.is_deferred() && + target_blockid == housing_block.get_blockid() && + housing_block.outgoing.len() == 1 && + { + // The block is empty when iseq_range is one instruction long. + let range = &housing_block.iseq_range; + let iseq = housing_block.iseq.get(); + let start_opcode = iseq_opcode_at_idx(iseq, range.start.into()) as usize; + let empty_end = range.start + insn_len(start_opcode) as IseqIdx; + range.end == empty_end + } + { + // Divert incoming branches of housing_block to the new block + housing_block.incoming.for_each(|incoming| { + let incoming = unsafe { incoming.as_ref() }; + for target in 0..incoming.targets.len() { + // SAFETY: No cell mutation; copying out a BlockRef. + if Some(BlockRef::from(housing_block)) == unsafe { + incoming.targets[target] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_block()) + } { + incoming.targets[target].set(Some(Box::new(BranchTarget::Block(new_block.into())))); + } + } + new_block.push_incoming(incoming.into()); + }); - // Fill out the target with this block - branch.targets[target_idx.as_usize()] = Some(Box::new(BranchTarget::Block(blockref.clone()))); + // Transplant the branch we've just hit to the new block + mem::drop(housing_block.outgoing.0.take()); + new_block.outgoing.push(branch.into()); + let housing_block: BlockRef = branch.block.replace(new_block.into()); + // Free the old housing block; there should now be no live &Block. + remove_block_version(&housing_block); + unsafe { free_block(housing_block, false) }; - return; + incr_counter!(deleted_defer_block_count); } +} +/// Generate a "stub", a piece of code that calls the compiler back when run. +/// A piece of code that redeems for more code; a thunk for code. +fn gen_branch_stub( + ctx: u32, + iseq: IseqPtr, + ocb: &mut OutlinedCb, + branch_struct_address: usize, + target_idx: u32, +) -> Option<CodePtr> { let ocb = ocb.unwrap(); - // Generate an outlined stub that will call branch_stub_hit() - let stub_addr = ocb.get_write_ptr(); + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); + asm.ctx = Context::decode(ctx); + asm.set_reg_mapping(asm.ctx.reg_mapping); + asm_comment!(asm, "branch stub hit"); - // Get a raw pointer to the branch. We clone and then decrement the strong count which overall - // balances the strong count. We do this so that we're passing the result of [Rc::into_raw] to - // [Rc::from_raw] as required. - // We make sure the block housing the branch is still alive when branch_stub_hit() is running. - let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone()); - unsafe { BranchRef::decrement_strong_count(branch_ptr) }; + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } + + // Save caller-saved registers before C_ARG_OPNDS get clobbered. + // Spill all registers for consistency with the trampoline. + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); + } - let mut asm = Assembler::new(); - asm.comment("branch stub hit"); + // Spill temps to the VM stack as well for jit.peek_at_stack() + asm.spill_regs(); // Set up the arguments unique to this stub for: - // branch_stub_hit(branch_ptr, target_idx, ec) - asm.mov(C_ARG_OPNDS[0], Opnd::const_ptr(branch_ptr as *const u8)); + // + // branch_stub_hit(branch_ptr, target_idx, ec) + // + // Bake pointer to Branch into output code. + // We make sure the block housing the branch is still alive when branch_stub_hit() is running. + asm.mov(C_ARG_OPNDS[0], branch_struct_address.into()); asm.mov(C_ARG_OPNDS[1], target_idx.into()); // Jump to trampoline to call branch_stub_hit() // Not really a side exit, just don't need a padded jump here. asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit()); - asm.compile(ocb); - - if ocb.has_dropped_bytes() { - // No space - } else { - // Fill the branch target with a stub - branch.targets[target_idx.as_usize()] = Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { - address: Some(stub_addr), - id: target, - ctx: ctx.clone(), - })))); - } + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { +pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // For `branch_stub_hit(branch_ptr, target_idx, ec)`, - // `branch_ptr` and `target_idx` is different for each stub, + // `branch_ptr` and `target_idx` are different for each stub, // but the call and what's after is the same. This trampoline // is the unchanging part. // Since this trampoline is static, it allows code GC inside // branch_stub_hit() to free stubs without problems. - asm.comment("branch_stub_hit() trampoline"); - let jump_addr = asm.ccall( + asm_comment!(asm, "branch_stub_hit() trampoline"); + let stub_hit_ret = asm.ccall( branch_stub_hit as *mut u8, vec![ C_ARG_OPNDS[0], @@ -1998,226 +3813,303 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { EC, ] ); + let jump_addr = asm.load(stub_hit_ret); + + // Restore caller-saved registers for stack temps + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); + } // Jump to the address returned by the branch_stub_hit() call asm.jmp_opnd(jump_addr); - asm.compile(ocb); + // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump + // to, so we need a scratch register to preserve it. This extends the live range of the C + // return register so we get something else for the return value. + let _ = asm.live_reg_opnd(stub_hit_ret); - code_ptr + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Return registers to be pushed and popped on branch_stub_hit. +pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator { + let temp_regs = Assembler::get_temp_regs().iter(); + let len = temp_regs.len(); + // The return value gen_leave() leaves in C_RET_REG + // needs to survive the branch_stub_hit() call. + let regs = temp_regs.chain(std::iter::once(&C_RET_REG)); + + // On x86_64, maintain 16-byte stack alignment + if cfg!(target_arch = "x86_64") && len % 2 == 0 { + static ONE_MORE: [Reg; 1] = [C_RET_REG]; + regs.chain(ONE_MORE.iter()) + } else { + regs.chain(&[]) + } } impl Assembler { + /// Mark the start position of a patchable entry point in the machine code + pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.start_addr.set(Some(code_ptr)); + }); + } + + /// Mark the end position of a patchable entry point in the machine code + pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.end_addr.set(Some(code_ptr)); + }); + } + // Mark the start position of a patchable branch in the machine code - fn mark_branch_start(&mut self, branchref: &BranchRef) + fn mark_branch_start(&mut self, branchref: &PendingBranchRef) { // We need to create our own branch rc object // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(move |code_ptr| { - let mut branch = branchref.borrow_mut(); - branch.start_addr = Some(code_ptr); + self.pos_marker(move |code_ptr, _| { + branchref.start_addr.set(Some(code_ptr)); }); } // Mark the end position of a patchable branch in the machine code - fn mark_branch_end(&mut self, branchref: &BranchRef) + fn mark_branch_end(&mut self, branchref: &PendingBranchRef) { // We need to create our own branch rc object // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(move |code_ptr| { - let mut branch = branchref.borrow_mut(); - branch.end_addr = Some(code_ptr); + self.pos_marker(move |code_ptr, _| { + branchref.end_addr.set(Some(code_ptr)); }); } } +#[must_use] pub fn gen_branch( - jit: &JITState, + jit: &mut JITState, asm: &mut Assembler, - ocb: &mut OutlinedCb, target0: BlockId, ctx0: &Context, target1: Option<BlockId>, ctx1: Option<&Context>, gen_fn: BranchGenFn, -) { - let branchref = make_branch_entry(&jit.get_block(), gen_fn); - let branch = &mut branchref.borrow_mut(); +) -> Option<()> { + let branch = new_pending_branch(jit, gen_fn); // Get the branch targets or stubs - set_branch_target(0, target0, ctx0, &branchref, branch, ocb); - if let Some(ctx) = ctx1 { - set_branch_target(1, target1.unwrap(), ctx, &branchref, branch, ocb); - if branch.targets[1].is_none() { - return; // avoid unwrap() in gen_fn() + let target0_addr = branch.set_target(0, target0, ctx0, jit)?; + let target1_addr = if let Some(ctx) = ctx1 { + let addr = branch.set_target(1, target1.unwrap(), ctx, jit); + if addr.is_none() { + // target1 requested but we're out of memory. + // Avoid unwrap() in gen_fn() + return None; } - } - // Call the branch generation function - asm.mark_branch_start(&branchref); - if let Some(dst_addr) = branch.get_target_address(0) { - gen_fn(asm, dst_addr, branch.get_target_address(1), BranchShape::Default); - } - asm.mark_branch_end(&branchref); -} + addr + } else { None }; -fn gen_jump_branch( - asm: &mut Assembler, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - if shape == BranchShape::Next1 { - panic!("Branch shape Next1 not allowed in gen_jump_branch!"); - } + // Call the branch generation function + asm.mark_branch_start(&branch); + branch.gen_fn.call(asm, Target::CodePtr(target0_addr), target1_addr.map(|addr| Target::CodePtr(addr))); + asm.mark_branch_end(&branch); - if shape == BranchShape::Default { - asm.jmp(target0.into()); - } + Some(()) } -pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { - let branchref = make_branch_entry(&jit.get_block(), gen_jump_branch); - let mut branch = branchref.borrow_mut(); - - let mut new_target = BranchTarget::Stub(Box::new(BranchStub { - address: None, - ctx: ctx.clone(), - id: target0, - })); - +pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let maybe_block = find_block_version(target0, ctx); // If the block already exists - if let Some(blockref) = maybe_block { - let mut block = blockref.borrow_mut(); - let block_addr = block.start_addr.unwrap(); - - block.push_incoming(branchref.clone()); - - new_target = BranchTarget::Block(blockref.clone()); - - branch.shape = BranchShape::Default; + let new_target = if let Some(blockref) = maybe_block { + let block = unsafe { blockref.as_ref() }; + let block_addr = block.start_addr; // Call the branch generation function - asm.comment("gen_direct_jmp: existing block"); - asm.mark_branch_start(&branchref); - gen_jump_branch(asm, block_addr, None, BranchShape::Default); - asm.mark_branch_end(&branchref); - } else { - // `None` in new_target.address signals gen_block_series() to compile the - // target block right after this one (fallthrough). - branch.shape = BranchShape::Next0; + asm_comment!(asm, "gen_direct_jmp: existing block"); + asm.mark_branch_start(&branch); + branch.gen_fn.call(asm, Target::CodePtr(block_addr), None); + asm.mark_branch_end(&branch); + BranchTarget::Block(blockref) + } else { // The branch is effectively empty (a noop) - asm.comment("gen_direct_jmp: fallthrough"); - asm.mark_branch_start(&branchref); - asm.mark_branch_end(&branchref); - } + asm_comment!(asm, "gen_direct_jmp: fallthrough"); + asm.mark_branch_start(&branch); + asm.mark_branch_end(&branch); + branch.gen_fn.set_shape(BranchShape::Next0); + + // `None` in new_target.address signals gen_block_series() to + // compile the target block right after this one (fallthrough). + BranchTarget::Stub(Box::new(BranchStub { + address: None, + ctx: Context::encode(ctx), + iseq: Cell::new(target0.iseq), + iseq_idx: target0.idx, + })) + }; - branch.targets[0] = Some(Box::new(new_target)); + branch.targets[0].set(Some(Box::new(new_target))); } /// Create a stub to force the code up to this point to be executed -pub fn defer_compilation( - jit: &JITState, - cur_ctx: &Context, - asm: &mut Assembler, - ocb: &mut OutlinedCb, -) { - if cur_ctx.chain_depth != 0 { +pub fn defer_compilation(jit: &mut JITState, asm: &mut Assembler) -> Result<(), ()> { + if asm.ctx.is_deferred() { panic!("Double defer!"); } - let mut next_ctx = cur_ctx.clone(); + let mut next_ctx = asm.ctx; - if next_ctx.chain_depth == u8::MAX { - panic!("max block version chain depth reached!"); - } - next_ctx.chain_depth += 1; + next_ctx.mark_as_deferred(); - let block_rc = jit.get_block(); - let branch_rc = make_branch_entry(&jit.get_block(), gen_jump_branch); - let mut branch = branch_rc.borrow_mut(); - let block = block_rc.borrow(); + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let blockid = BlockId { - iseq: block.blockid.iseq, + iseq: jit.get_iseq(), idx: jit.get_insn_idx(), }; - set_branch_target(0, blockid, &next_ctx, &branch_rc, &mut branch, ocb); + + // Likely a stub since the context is marked as deferred(). + let dst_addr = branch.set_target(0, blockid, &next_ctx, jit).ok_or(())?; + + // Pad the block if it has the potential to be invalidated. This must be + // done before gen_fn() in case the jump is overwritten by a fallthrough. + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } // Call the branch generation function - asm.comment("defer_compilation"); - asm.mark_branch_start(&branch_rc); - if let Some(dst_addr) = branch.get_target_address(0) { - gen_jump_branch(asm, dst_addr, None, BranchShape::Default); + asm_comment!(asm, "defer_compilation"); + asm.mark_branch_start(&branch); + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); + asm.mark_branch_end(&branch); + + // If the block we're deferring from is empty + if jit.get_starting_insn_idx() == jit.get_insn_idx() { + incr_counter!(defer_empty_count); } - asm.mark_branch_end(&branch_rc); incr_counter!(defer_count); + + Ok(()) } -fn remove_from_graph(blockref: &BlockRef) { - let block = blockref.borrow(); +/// Remove a block from the live control flow graph. +/// Block must be initialized and incoming/outgoing edges +/// must also point to initialized blocks. +unsafe fn remove_from_graph(blockref: BlockRef) { + let block = unsafe { blockref.as_ref() }; // Remove this block from the predecessor's targets - for pred_branchref in &block.incoming { + for pred_branchref in block.incoming.0.take().iter() { // Branch from the predecessor to us - let mut pred_branch = pred_branchref.borrow_mut(); + let pred_branch = unsafe { pred_branchref.as_ref() }; // If this is us, nullify the target block - for target_idx in 0..=1 { - if let Some(target) = pred_branch.targets[target_idx].as_ref() { - if target.get_block().as_ref() == Some(blockref) { - pred_branch.targets[target_idx] = None; - } + for target_idx in 0..pred_branch.targets.len() { + // SAFETY: no mutation inside unsafe + let target_is_us = unsafe { + pred_branch.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_block()) + .and_then(|target_block| (target_block == blockref).then(|| ())) + .is_some() + }; + + if target_is_us { + pred_branch.targets[target_idx].set(None); } } } // For each outgoing branch - for out_branchref in &block.outgoing { - let out_branch = out_branchref.borrow(); - + block.outgoing.for_each(|out_branchref| { + let out_branch = unsafe { out_branchref.as_ref() }; // For each successor block - for out_target in out_branch.targets.iter().flatten() { - if let Some(succ_blockref) = &out_target.get_block() { + for out_target in out_branch.targets.iter() { + // SAFETY: copying out an Option<BlockRef>. No mutation. + let succ_block: Option<BlockRef> = unsafe { + out_target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(succ_block) = succ_block { // Remove outgoing branch from the successor's incoming list - let mut succ_block = succ_blockref.borrow_mut(); - succ_block - .incoming - .retain(|succ_incoming| !Rc::ptr_eq(succ_incoming, out_branchref)); + // SAFETY: caller promises the block has valid outgoing edges. + let succ_block = unsafe { succ_block.as_ref() }; + // Temporarily move out of succ_block.incoming. + let succ_incoming = succ_block.incoming.0.take(); + let mut succ_incoming = succ_incoming.into_vec(); + succ_incoming.retain(|branch| *branch != out_branchref); + succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort } } - } + }); } -/// Remove most references to a block to deallocate it. -/// Does not touch references from iseq payloads. -pub fn free_block(blockref: &BlockRef) { - block_assumptions_free(blockref); +/// Tear down a block and deallocate it. +/// Caller has to ensure that the code tracked by the block is not +/// running, as running code may hit [branch_stub_hit] who expects +/// [Branch] to be live. +/// +/// We currently ensure this through the `jit_cont` system in cont.c +/// and sometimes through the GC calling [rb_yjit_iseq_free]. The GC +/// has proven that an ISeq is not running if it calls us to free it. +/// +/// For delayed deallocation, since dead blocks don't keep +/// blocks they refer alive, by the time we get here their outgoing +/// edges may be dangling. Pass `graph_intact=false` such these cases. +pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) { + // Careful with order here. + // First, remove all pointers to the referent block + unsafe { + block_assumptions_free(blockref); + + if graph_intact { + remove_from_graph(blockref); + } + } - remove_from_graph(blockref); + // SAFETY: we should now have a unique pointer to the block + unsafe { dealloc_block(blockref) } +} - // Branches have a Rc pointing at the block housing them. - // Break the cycle. - blockref.borrow_mut().incoming.clear(); - blockref.borrow_mut().outgoing.clear(); +/// Deallocate a block and its outgoing branches. Blocks own their outgoing branches. +/// Caller must ensure that we have unique ownership for the referent block +unsafe fn dealloc_block(blockref: BlockRef) { + unsafe { + for outgoing in blockref.as_ref().outgoing.0.take().iter() { + // this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch + mem::drop(Box::from_raw(outgoing.as_ptr())); + } + } - // No explicit deallocation here as blocks are ref-counted. + // Deallocate the referent Block + unsafe { + // this Box::from_raw matches the Box::into_raw from JITState::into_block + mem::drop(Box::from_raw(blockref.as_ptr())); + } } // Some runtime checks for integrity of a program location pub fn verify_blockid(blockid: BlockId) { unsafe { assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0); - assert!(blockid.idx < get_iseq_encoded_size(blockid.iseq)); + assert!(u32::from(blockid.idx) < get_iseq_encoded_size(blockid.iseq)); } } @@ -2228,20 +4120,21 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // TODO: want to assert that all other ractors are stopped here. Can't patch // machine code that some other thread is running. - let block = blockref.borrow(); + let block = unsafe { (*blockref).as_ref() }; + let id_being_invalidated = block.get_blockid(); let mut cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); - verify_blockid(block.blockid); + verify_blockid(id_being_invalidated); #[cfg(feature = "disasm")] { // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { - let blockid_idx = block.blockid.idx; - let iseq_location = iseq_get_location(block.blockid.iseq, blockid_idx); + let iseq_range = &block.iseq_range; + let iseq_location = iseq_get_location(block.iseq.get(), iseq_range.start); if iseq_location.contains(substr) { - println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, blockid_idx, block.end_idx); + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, iseq_range.start, iseq_range.end); } } } @@ -2252,7 +4145,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // Get a pointer to the generated code for this block let block_start = block.start_addr; - // Make the the start of the block do an exit. This handles OOM situations + // Make the start of the block do an exit. This handles OOM situations // and some cases where we can't efficiently patch incoming branches. // Do this first, since in case there is a fallthrough branch into this // block, the patching loop below can overwrite the start of the block. @@ -2263,36 +4156,29 @@ pub fn invalidate_block_version(blockref: &BlockRef) { .entry_exit .expect("invalidation needs the entry_exit field"); { - let block_start = block - .start_addr - .expect("invalidation needs constructed block"); - let block_end = block - .end_addr - .expect("invalidation needs constructed block"); + let block_end = block.get_end_addr(); if block_start == block_entry_exit { // Some blocks exit on entry. Patching a jump to the entry at the // entry makes an infinite loop. } else { - // TODO(alan) - // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region - // Patch in a jump to block.entry_exit. let cur_pos = cb.get_write_ptr(); let cur_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(block_start); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm.jmp(block_entry_exit.as_side_exit()); cb.set_dropped_bytes(false); - asm.compile(&mut cb); + asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); assert!( cb.get_write_ptr() <= block_end, - "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})", block.code_size(), - cb.get_write_ptr().into_i64() - block_start.into_i64(), + cb.get_write_ptr().as_offset() - block_start.as_offset(), + block.start_addr.raw_ptr(cb), ); cb.set_write_ptr(cur_pos); cb.set_dropped_bytes(cur_dropped_bytes); @@ -2300,64 +4186,92 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // For each incoming branch - for branchref in &block.incoming { - let mut branch = branchref.borrow_mut(); - let target_idx = if branch.get_target_address(0) == block_start { + let mut incoming_branches = block.incoming.0.take(); + + // An adjacent branch will write into the start of the block being invalidated, possibly + // overwriting the block's exit. If we run out of memory after doing this, any subsequent + // incoming branches we rewrite won't be able use the block's exit as a fallback when they + // are unable to generate a stub. To avoid this, if there's an incoming branch that's + // adjacent to the invalidated block, make sure we process it last. + let adjacent_branch_idx = incoming_branches.iter().position(|branchref| { + let branch = unsafe { branchref.as_ref() }; + let target_next = block.start_addr == branch.end_addr.get(); + target_next + }); + if let Some(adjacent_branch_idx) = adjacent_branch_idx { + incoming_branches.swap(adjacent_branch_idx, incoming_branches.len() - 1) + } + + for (i, branchref) in incoming_branches.iter().enumerate() { + let branch = unsafe { branchref.as_ref() }; + let target_idx = if branch.get_target_address(0) == Some(block_start) { 0 } else { 1 }; // Assert that the incoming branch indeed points to the block being invalidated - let incoming_target = branch.targets[target_idx].as_ref().unwrap(); - assert_eq!(block_start, incoming_target.get_address()); - if let Some(incoming_block) = &incoming_target.get_block() { - assert_eq!(blockref, incoming_block); - } - - // TODO(alan): - // Don't patch frozen code region - // if (branch.start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { - // continue; - // } - - // Create a stub for this branch target or rewire it to a valid block - set_branch_target(target_idx as u32, block.blockid, &block.ctx, branchref, &mut branch, ocb); - - if branch.targets[target_idx].is_none() { - // We were unable to generate a stub (e.g. OOM). Use the block's - // exit instead of a stub for the block. It's important that we - // still patch the branch in this situation so stubs are unique - // to branches. Think about what could go wrong if we run out of - // memory in the middle of this loop. - branch.targets[target_idx] = Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { - address: block.entry_exit, - id: block.blockid, - ctx: block.ctx.clone(), - })))); + // SAFETY: no mutation. + unsafe { + let incoming_target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + assert_eq!(Some(block_start), incoming_target.get_address()); + if let Some(incoming_block) = &incoming_target.get_block() { + assert_eq!(blockref, incoming_block); + } } + // Create a stub for this branch target + let stub_addr = gen_branch_stub(block.ctx, block.iseq.get(), ocb, branchref.as_ptr() as usize, target_idx as u32); + + // In case we were unable to generate a stub (e.g. OOM). Use the block's + // exit instead of a stub for the block. It's important that we + // still patch the branch in this situation so stubs are unique + // to branches. Think about what could go wrong if we run out of + // memory in the middle of this loop. + let stub_addr = stub_addr.unwrap_or(block_entry_exit); + + // Fill the branch target with a stub + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: block.iseq.clone(), + iseq_idx: block.iseq_range.start, + ctx: block.ctx, + }))))); + // Check if the invalidated block immediately follows - let target_next = block.start_addr == branch.end_addr; + let target_next = block.start_addr == branch.end_addr.get(); if target_next { - // The new block will no longer be adjacent. - // Note that we could be enlarging the branch and writing into the - // start of the block being invalidated. - branch.shape = BranchShape::Default; + if stub_addr != block.start_addr { + // The new block will no longer be adjacent. + // Note that we could be enlarging the branch and writing into the + // start of the block being invalidated. + branch.gen_fn.set_shape(BranchShape::Default); + } else { + // The branch target is still adjacent, so the branch must remain + // a fallthrough so we don't overwrite the target with a jump. + // + // This can happen if we're unable to generate a stub and the + // target block also exits on entry (block_start == block_entry_exit). + } } // Rewrite the branch with the new jump target address let old_branch_size = branch.code_size(); - regenerate_branch(cb, &mut branch); + regenerate_branch(cb, branch); if target_next && branch.end_addr > block.end_addr { panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); } + let is_last_incoming_branch = i == incoming_branches.len() - 1; + if target_next && branch.end_addr.get() > block_entry_exit && !is_last_incoming_branch { + // We might still need to jump to this exit if we run out of memory when rewriting another incoming branch. + panic!("yjit invalidate rewrote branch over exit of invalidated block: {:?}", branch); + } if !target_next && branch.code_size() > old_branch_size { panic!( "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", - branch.start_addr.unwrap().raw_ptr(), old_branch_size, branch.code_size() + branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size() ); } } @@ -2370,17 +4284,21 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // points will always have an instruction index of 0. We'll need to // change this in the future when we support optional parameters because // they enter the function with a non-zero PC - if block.blockid.idx == 0 { + if block.iseq_range.start == 0 { // TODO: // We could reset the exec counter to zero in rb_iseq_reset_jit_func() // so that we eventually compile a new entry point when useful - unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) }; + unsafe { rb_iseq_reset_jit_func(block.iseq.get()) }; } // FIXME: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. - delayed_deallocation(blockref); + // SAFETY: This block was in a version_map earlier + // in this function before we removed it, so it's well connected. + unsafe { remove_from_graph(*blockref) }; + + delayed_deallocation(*blockref); ocb.unwrap().mark_all_executable(); cb.mark_all_executable(); @@ -2388,29 +4306,57 @@ pub fn invalidate_block_version(blockref: &BlockRef) { incr_counter!(invalidation_count); } -// We cannot deallocate blocks immediately after invalidation since there -// could be stubs waiting to access branch pointers. Return stubs can do -// this since patching the code for setting up return addresses does not -// affect old return addresses that are already set up to use potentially -// invalidated branch pointers. Example: +// We cannot deallocate blocks immediately after invalidation since patching the code for setting +// up return addresses does not affect outstanding return addresses that are on stack and will use +// invalidated branch pointers when hit. Example: // def foo(n) // if n == 2 -// return 1.times { Object.define_method(:foo) {} } +// # 1.times.each to create a cfunc frame to preserve the JIT frame +// # which will return to a stub housed in an invalidated block +// return 1.times.each { Object.define_method(:foo) {} } // end // -// foo(n + 1) +// foo(n + 1) # The block for this call houses the return branch stub // end // p foo(1) -pub fn delayed_deallocation(blockref: &BlockRef) { +pub fn delayed_deallocation(blockref: BlockRef) { block_assumptions_free(blockref); - // We do this another time when we deem that it's safe - // to deallocate in case there is another Ractor waiting to acquire the - // VM lock inside branch_stub_hit(). - remove_from_graph(blockref); + let block = unsafe { blockref.as_ref() }; + // Set null ISEQ on the block to signal that it's dead. + let iseq = block.iseq.replace(ptr::null()); + let payload = get_iseq_payload(iseq).unwrap(); + payload.dead_blocks.push(blockref); +} - let payload = get_iseq_payload(blockref.borrow().blockid.iseq).unwrap(); - payload.dead_blocks.push(blockref.clone()); +trait RefUnchecked { + type Contained; + unsafe fn ref_unchecked(&self) -> &Self::Contained; +} + +impl<T> RefUnchecked for Cell<T> { + type Contained = T; + + /// Gives a reference to the contents of a [Cell]. + /// Dangerous; please include a SAFETY note. + /// + /// An easy way to use this without triggering Undefined Behavior is to + /// 1. ensure there is transitively no Cell/UnsafeCell mutation in the `unsafe` block + /// 2. ensure the `unsafe` block does not return any references, so our + /// analysis is lexically confined. This is trivially true if the block + /// returns a `bool`, for example. Aggregates that store references have + /// explicit lifetime parameters that look like `<'a>`. + /// + /// There are other subtler situations that don't follow these rules yet + /// are still sound. + /// See `test_miri_ref_unchecked()` for examples. You can play with it + /// with `cargo +nightly miri test miri`. + unsafe fn ref_unchecked(&self) -> &Self::Contained { + // SAFETY: pointer is dereferenceable because it's from a &Cell. + // It's up to the caller to follow aliasing rules with the output + // reference. + unsafe { self.as_ptr().as_ref().unwrap() } + } } #[cfg(test)] @@ -2418,31 +4364,240 @@ mod tests { use crate::core::*; #[test] + fn type_size() { + // Check that we can store types in 4 bits, + // and all local types in 32 bits + assert_eq!(mem::size_of::<Type>(), 1); + assert!(Type::BlockParamProxy as usize <= 0b1111); + assert!(MAX_CTX_LOCALS * 4 <= 32); + } + + #[test] + fn local_types() { + let mut ctx = Context::default(); + + for i in 0..MAX_CTX_LOCALS { + ctx.set_local_type(i, Type::Fixnum); + assert_eq!(ctx.get_local_type(i), Type::Fixnum); + ctx.set_local_type(i, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy); + } + + ctx.set_local_type(0, Type::Fixnum); + ctx.clear_local_types(); + assert!(ctx.get_local_type(0) == Type::Unknown); + + // Make sure we don't accidentally set bits incorrectly + let mut ctx = Context::default(); + ctx.set_local_type(0, Type::Fixnum); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + ctx.set_local_type(2, Type::Fixnum); + ctx.set_local_type(1, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + assert_eq!(ctx.get_local_type(2), Type::Fixnum); + } + + #[test] fn types() { // Valid src => dst - assert_eq!(Type::Unknown.diff(Type::Unknown), 0); - assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), 0); - assert_ne!(Type::UnknownImm.diff(Type::Unknown), usize::MAX); - assert_ne!(Type::Fixnum.diff(Type::Unknown), usize::MAX); - assert_ne!(Type::Fixnum.diff(Type::UnknownImm), usize::MAX); + assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); + assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), TypeDiff::Compatible(0)); + assert_ne!(Type::UnknownImm.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::UnknownImm), TypeDiff::Incompatible); // Invalid src => dst - assert_eq!(Type::Unknown.diff(Type::UnknownImm), usize::MAX); - assert_eq!(Type::Unknown.diff(Type::Fixnum), usize::MAX); - assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), usize::MAX); + assert_eq!(Type::Unknown.diff(Type::UnknownImm), TypeDiff::Incompatible); + assert_eq!(Type::Unknown.diff(Type::Fixnum), TypeDiff::Incompatible); + assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible); + } + + #[test] + fn reg_mapping() { + let mut reg_mapping = RegMapping([None, None, None, None, None]); + + // 0 means every slot is not spilled + for stack_idx in 0..MAX_CTX_TEMPS as u8 { + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(stack_idx)), None); + } + + // Set 0, 2, 6 (RegMapping: [Some(0), Some(6), Some(2), None, None]) + reg_mapping.alloc_reg(RegOpnd::Stack(0)); + reg_mapping.alloc_reg(RegOpnd::Stack(2)); + reg_mapping.alloc_reg(RegOpnd::Stack(3)); + reg_mapping.dealloc_reg(RegOpnd::Stack(3)); + reg_mapping.alloc_reg(RegOpnd::Stack(6)); + + // Get 0..8 + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(0)), Some(0)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(1)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(2)), Some(2)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(3)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(4)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(5)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(6)), Some(1)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(7)), None); } #[test] fn context() { // Valid src => dst - assert_eq!(Context::default().diff(&Context::default()), 0); + assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0)); // Try pushing an operand and getting its type - let mut ctx = Context::default(); - ctx.stack_push(Type::Fixnum); - let top_type = ctx.get_opnd_type(StackOpnd(0)); + let mut asm = Assembler::new(0); + asm.stack_push(Type::Fixnum); + let top_type = asm.ctx.get_opnd_type(StackOpnd(0)); assert!(top_type == Type::Fixnum); // TODO: write more tests for Context type diff } + + #[test] + fn context_upgrade_local() { + let mut asm = Assembler::new(0); + asm.stack_push_local(0); + asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + } + + #[test] + fn context_chain_depth() { + let mut ctx = Context::default(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_return_landing(), false); + assert_eq!(ctx.is_deferred(), false); + + for _ in 0..5 { + ctx.increment_chain_depth(); + } + assert_eq!(ctx.get_chain_depth(), 5); + + ctx.set_as_return_landing(); + assert_eq!(ctx.is_return_landing(), true); + + ctx.clear_return_landing(); + assert_eq!(ctx.is_return_landing(), false); + + ctx.mark_as_deferred(); + assert_eq!(ctx.is_deferred(), true); + + ctx.reset_chain_depth_and_defer(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_deferred(), false); + } + + #[test] + fn shift_stack_for_send() { + let mut asm = Assembler::new(0); + + // Push values to simulate send(:name, arg) with 6 items already on-stack + for _ in 0..6 { + asm.stack_push(Type::Fixnum); + } + asm.stack_push(Type::Unknown); + asm.stack_push(Type::ImmSymbol); + asm.stack_push(Type::Unknown); + + // This method takes argc of the sendee, not argc of send + asm.shift_stack(1); + + // The symbol should be gone + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1))); + } + + #[test] + fn test_miri_ref_unchecked() { + let blockid = BlockId { + iseq: ptr::null(), + idx: 0, + }; + let cb = CodeBlock::new_dummy(1024); + let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024)); + let dumm_addr = cb.get_write_ptr(); + let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true) + .into_block(0, dumm_addr, dumm_addr, vec![]); + let _dropper = BlockDropper(block); + + // Outside of brief moments during construction, + // we're always working with &Branch (a shared reference to a Branch). + let branch: &Branch = &Branch { + gen_fn: BranchGenFn::JZToTarget0, + block: Cell::new(block), + start_addr: dumm_addr, + end_addr: Cell::new(dumm_addr), + targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: 0, + })))))] + }; + // For easier soundness reasoning, make sure the reference returned does not out live the + // `unsafe` block! It's tempting to do, but it leads to non-local issues. + // Here is an example where it goes wrong: + if false { + for target in branch.targets.iter().as_ref() { + if let Some(btarget) = unsafe { target.ref_unchecked() } { + // btarget is derived from the usnafe block! + target.set(None); // This drops the contents of the cell... + assert!(btarget.get_address().is_none()); // but `btarget` is still live! UB. + } + } + } + + // Do something like this instead. It's not pretty, but it's easier to vet for UB this way. + for target in branch.targets.iter().as_ref() { + // SAFETY: no mutation within unsafe + if unsafe { target.ref_unchecked().is_none() } { + continue; + } + // SAFETY: no mutation within unsafe + assert!(unsafe { target.ref_unchecked().as_ref().unwrap().get_address().is_none() }); + target.set(None); + } + + // A more subtle situation where we do Cell/UnsafeCell mutation over the + // lifetime of the reference released by ref_unchecked(). + branch.targets[0].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: 0, + }))))); + // Invalid ISeq; we never dereference it. + let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr(); + unsafe { + if let Some(branch_target) = branch.targets[0].ref_unchecked().as_ref() { + if let BranchTarget::Stub(stub) = branch_target.as_ref() { + // SAFETY: + // This is a Cell mutation, but it mutates the contents + // of a a Cell<IseqPtr>, which is a different type + // from the type of Cell found in `Branch::targets`, so + // there is no chance of mutating the Cell that we called + // ref_unchecked() on above. + Cell::set(&stub.iseq, secret_iseq); + } + } + }; + // Check that we indeed changed the iseq of the stub + // Cell::take moves out of the cell. + assert_eq!( + secret_iseq as usize, + branch.targets[0].take().unwrap().get_blockid().iseq as usize + ); + + struct BlockDropper(BlockRef); + impl Drop for BlockDropper { + fn drop(&mut self) { + // SAFETY: we have ownership because the test doesn't stash + // the block away in any global structure. + // Note that the test being self-contained is also why we + // use dealloc_block() over free_block(), as free_block() touches + // the global invariants tables unavailable in tests. + unsafe { dealloc_block(self.0) }; + } + } + } } diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index f4a6956926..d34b049a45 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -83,7 +83,8 @@ #![allow(non_upper_case_globals)] use std::convert::From; -use std::ffi::CString; +use std::ffi::{CString, CStr}; +use std::fmt::{Debug, Formatter}; use std::os::raw::{c_char, c_int, c_uint}; use std::panic::{catch_unwind, UnwindSafe}; @@ -96,7 +97,7 @@ pub type size_t = u64; pub type RedefinitionFlag = u32; #[allow(dead_code)] -#[allow(clippy::useless_transmute)] +#[allow(clippy::all)] mod autogened { use super::*; // Textually include output from rust-bindgen as suggested by its user guide. @@ -107,13 +108,25 @@ pub use autogened::*; // TODO: For #defines that affect memory layout, we need to check for them // on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. -// These are functions we expose from vm_insnhelper.c, not in any header. +// These are functions we expose from C files, not in any header. // Parsing it would result in a lot of duplicate definitions. // Use bindgen for functions that are defined in headers or in yjit.c. #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { + pub fn rb_check_overloaded_cme( + me: *const rb_callable_method_entry_t, + ci: *const rb_callinfo, + ) -> *const rb_callable_method_entry_t; + + // Floats within range will be encoded without creating objects in the heap. + // (Range is 0x3000000000000001 to 0x4fffffffffffffff (1.7272337110188893E-77 to 2.3158417847463237E+77). + pub fn rb_float_new(d: f64) -> VALUE; + + pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; + pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; + pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, reg_cfp: CfpPtr, @@ -135,19 +148,20 @@ extern "C" { ic: ICVARC, ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; - pub fn rb_str_bytesize(str: VALUE) -> VALUE; + pub fn rb_vm_stack_canary() -> VALUE; + pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); } // Renames pub use rb_insn_name as raw_insn_name; -pub use rb_insn_len as raw_insn_len; -pub use rb_yarv_class_of as CLASS_OF; pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_iseq as get_cfp_iseq; pub use rb_get_cfp_pc as get_cfp_pc; pub use rb_get_cfp_sp as get_cfp_sp; pub use rb_get_cfp_self as get_cfp_self; pub use rb_get_cfp_ep as get_cfp_ep; pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_vm_base_ptr as get_cfp_bp; pub use rb_get_cme_def_type as get_cme_def_type; pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; @@ -162,11 +176,11 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size; pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_body_type as get_iseq_body_type; pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; -pub use rb_get_iseq_flags_ruby2_keywords as get_iseq_flags_ruby2_keywords; pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; @@ -183,7 +197,8 @@ pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; pub use rb_get_call_data_ci as get_call_data_ci; pub use rb_yarv_str_eql_internal as rb_str_eql_internal; pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; -pub use rb_yarv_fix_mod_fix as rb_fix_mod_fix; +pub use rb_jit_fix_div_fix as rb_fix_div_fix; +pub use rb_jit_fix_mod_fix as rb_fix_mod_fix; pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; @@ -199,8 +214,6 @@ pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { - use std::ffi::CStr; - unsafe { // Look up Ruby's NULL-terminated insn name string let op_name = raw_insn_name(VALUE(opcode)); @@ -220,7 +233,7 @@ pub fn insn_len(opcode: usize) -> u32 { #[cfg(not(test))] unsafe { - raw_insn_len(VALUE(opcode)).try_into().unwrap() + rb_insn_len(VALUE(opcode)).try_into().unwrap() } } @@ -243,6 +256,30 @@ pub struct VALUE(pub usize); /// Pointer to an ISEQ pub type IseqPtr = *const rb_iseq_t; +// Given an ISEQ pointer, convert PC to insn_idx +pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { + let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; + unsafe { pc.offset_from(pc_zero) }.try_into().ok() +} + +/// Given an ISEQ pointer and an instruction index, return an opcode. +pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 { + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 } +} + +/// Return a poison value to be set above the stack top to verify leafness. +#[cfg(not(test))] +pub fn vm_stack_canary() -> u64 { + unsafe { rb_vm_stack_canary() }.as_u64() +} + +/// Avoid linking the C function in `cargo test` +#[cfg(test)] +pub fn vm_stack_canary() -> u64 { + 0 +} + /// Opaque execution-context type from vm_core.h #[repr(C)] pub struct rb_execution_context_struct { @@ -277,13 +314,6 @@ pub struct rb_callcache { _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, } -/// Opaque call-info type from vm_callinfo.h -#[repr(C)] -pub struct rb_callinfo_kwarg { - _data: [u8; 0], - _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, -} - /// Opaque control_frame (CFP) struct from vm_core.h #[repr(C)] pub struct rb_control_frame_struct { @@ -331,6 +361,11 @@ impl VALUE { !self.special_const_p() } + /// Shareability between ractors. `RB_OBJ_SHAREABLE_P()`. + pub fn shareable_p(self) -> bool { + (self.builtin_flags() & RUBY_FL_SHAREABLE as usize) != 0 + } + /// Return true if the value is a Ruby Fixnum (immediate-size integer) pub fn fixnum_p(self) -> bool { let VALUE(cval) = self; @@ -367,6 +402,11 @@ impl VALUE { } } + /// Returns true if the value is T_HASH + pub fn hash_p(self) -> bool { + !self.special_const_p() && self.builtin_type() == RUBY_T_HASH + } + /// Returns true or false depending on whether the value is nil pub fn nil_p(self) -> bool { self == Qnil @@ -391,7 +431,13 @@ impl VALUE { } pub fn class_of(self) -> VALUE { - unsafe { CLASS_OF(self) } + if !self.special_const_p() { + let builtin_type = self.builtin_type(); + assert_ne!(builtin_type, RUBY_T_NONE, "YJIT should only see live objects"); + assert_ne!(builtin_type, RUBY_T_MOVED, "YJIT should only see live objects"); + } + + unsafe { rb_yarv_class_of(self) } } pub fn is_frozen(self) -> bool { @@ -399,28 +445,16 @@ impl VALUE { } pub fn shape_too_complex(self) -> bool { - unsafe { rb_shape_obj_too_complex(self) } + unsafe { rb_yjit_shape_obj_too_complex_p(self) } } pub fn shape_id_of(self) -> u32 { - unsafe { rb_shape_get_shape_id(self) } - } - - pub fn shape_of(self) -> *mut rb_shape { - unsafe { - let shape = rb_shape_get_shape_by_id(self.shape_id_of()); - - if shape.is_null() { - panic!("Shape should not be null"); - } else { - shape - } - } + unsafe { rb_obj_shape_id(self) } } pub fn embedded_p(self) -> bool { unsafe { - FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0) + FL_TEST_RAW(self, VALUE(ROBJECT_HEAP as usize)) == VALUE(0) } } @@ -463,7 +497,7 @@ impl VALUE { us as *mut T } - /// For working with opague pointers and encoding null check. + /// For working with opaque pointers and encoding null check. /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>` /// is for `*mut T` while our C functions are setup to use `*const T`. /// Casting from `NonNull<T>` to `*const T` is too noisy. @@ -500,9 +534,7 @@ impl VALUE { ptr } -} -impl VALUE { pub fn fixnum_from_usize(item: usize) -> Self { assert!(item <= (RUBY_FIXNUM_MAX as usize)); // An unsigned will always be greater than RUBY_FIXNUM_MIN let k: usize = item.wrapping_add(item.wrapping_add(1)); @@ -524,6 +556,18 @@ impl From<*const rb_callable_method_entry_t> for VALUE { } } +impl From<&str> for VALUE { + fn from(value: &str) -> Self { + rust_str_to_ruby(value) + } +} + +impl From<String> for VALUE { + fn from(value: String) -> Self { + rust_str_to_ruby(&value) + } +} + impl From<VALUE> for u64 { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; @@ -555,33 +599,58 @@ impl From<VALUE> for u16 { } /// Produce a Ruby string from a Rust string slice -#[cfg(feature = "disasm")] pub fn rust_str_to_ruby(str: &str) -> VALUE { unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } } /// Produce a Ruby symbol from a Rust string slice pub fn rust_str_to_sym(str: &str) -> VALUE { + let id = rust_str_to_id(str); + unsafe { rb_id2sym(id) } +} + +/// Produce an ID from a Rust string slice +pub fn rust_str_to_id(str: &str) -> ID { let c_str = CString::new(str).unwrap(); let c_ptr: *const c_char = c_str.as_ptr(); + unsafe { rb_intern(c_ptr) } +} + +/// Produce an owned Rust String from a C char pointer +pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> { + assert!(c_char_ptr != std::ptr::null()); + + let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) }; - unsafe { rb_id2sym(rb_intern(c_ptr)) } + match c_str.to_str() { + Ok(rust_str) => Some(rust_str.to_string()), + Err(_) => None + } } /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { - pub file: CString, + pub file: &'static CStr, pub line: c_int, } +impl Debug for SourceLocation { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}:{}", self.file.to_string_lossy(), self.line)) + } +} + /// Make a [SourceLocation] at the current spot. macro_rules! src_loc { () => { - // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds. - $crate::cruby::SourceLocation { - file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths - line: line!().try_into().unwrap(), // not that many lines + { + // Nul-terminated string with static lifetime, make a CStr out of it safely. + let file: &'static str = concat!(file!(), '\0'); + $crate::cruby::SourceLocation { + file: unsafe { std::ffi::CStr::from_ptr(file.as_ptr().cast()) }, + line: line!().try_into().unwrap(), + } } }; } @@ -612,28 +681,27 @@ where let line = loc.line; let mut recursive_lock_level: c_uint = 0; - unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) }; + unsafe { rb_jit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) }; let ret = match catch_unwind(func) { Ok(result) => result, Err(_) => { // Theoretically we can recover from some of these panics, // but it's too late if the unwind reaches here. - use std::{process, str}; let _ = catch_unwind(|| { // IO functions can panic too. eprintln!( "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...", - str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"), + loc.file.to_string_lossy(), line, ); }); - process::abort(); + std::process::abort(); } }; - unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) }; + unsafe { rb_jit_vm_unlock(&mut recursive_lock_level, file, line) }; ret } @@ -663,8 +731,10 @@ mod manual_defs { pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2; // From vm_callinfo.h - uses calculation that seems to confuse bindgen + pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit; pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit; pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit; + pub const VM_CALL_FORWARDING: u32 = 1 << VM_CALL_FORWARDING_bit; pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit; pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit; pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit; @@ -673,7 +743,7 @@ mod manual_defs { pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit; // From internal/struct.h - in anonymous enum, so we can't easily import it - pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize; + pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 |RUBY_FL_USER2 | RUBY_FL_USER1) as usize; // From iseq.h - via a different constant, which seems to confuse bindgen pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER7 as usize; @@ -689,6 +759,9 @@ mod manual_defs { pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr" pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary" + pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary" + // Constants from rb_control_frame_t vm_core.h pub const RUBY_OFFSET_CFP_PC: i32 = 0; pub const RUBY_OFFSET_CFP_SP: i32 = 8; @@ -696,15 +769,8 @@ mod manual_defs { pub const RUBY_OFFSET_CFP_SELF: i32 = 24; pub const RUBY_OFFSET_CFP_EP: i32 = 32; pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40; - pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__ - pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56; - pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64; - - // Constants from rb_execution_context_t vm_core.h - pub const RUBY_OFFSET_EC_CFP: i32 = 16; - pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32) - pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32) - pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48; + pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48; + pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56; // Constants from rb_thread_t in vm_core.h pub const RUBY_OFFSET_THREAD_SELF: i32 = 16; @@ -714,3 +780,52 @@ mod manual_defs { pub const RUBY_OFFSET_ICE_VALUE: i32 = 8; } pub use manual_defs::*; + +/// Interned ID values for Ruby symbols and method names. +/// See [type@crate::cruby::ID] and usages outside of YJIT. +pub(crate) mod ids { + use std::sync::atomic::AtomicU64; + /// Globals to cache IDs on boot. Atomic to use with relaxed ordering + /// so reads can happen without `unsafe`. Synchronization done through + /// the VM lock. + macro_rules! def_ids { + ($(name: $ident:ident content: $str:literal)*) => { + $( + #[doc = concat!("[type@crate::cruby::ID] for `", stringify!($str), "`")] + pub static $ident: AtomicU64 = AtomicU64::new(0); + )* + + pub(crate) fn init() { + $( + let content = &$str; + let ptr: *const u8 = content.as_ptr(); + + // Lookup and cache each ID + $ident.store( + unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) }, + std::sync::atomic::Ordering::Relaxed + ); + )* + + } + } + } + + def_ids! { + name: NULL content: b"" + name: respond_to_missing content: b"respond_to_missing?" + name: method_missing content: b"method_missing" + name: to_ary content: b"to_ary" + name: to_s content: b"to_s" + name: eq content: b"==" + name: include_p content: b"include?" + } +} + +/// Get an CRuby `ID` to an interned string, e.g. a particular method name. +macro_rules! ID { + ($id_name:ident) => { + $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed) + } +} +pub(crate) use ID; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index b8a8c91f38..56994388a3 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.63.0 */ +/* automatically generated by rust-bindgen 0.70.1 */ #[repr(C)] #[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -81,6 +81,36 @@ where } } #[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]); +impl<T> __IncompleteArrayField<T> { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +#[repr(C)] pub struct __BindgenUnionField<T>(::std::marker::PhantomData<T>); impl<T> __BindgenUnionField<T> { #[inline] @@ -105,7 +135,7 @@ impl<T> ::std::default::Default for __BindgenUnionField<T> { impl<T> ::std::clone::Clone for __BindgenUnionField<T> { #[inline] fn clone(&self) -> Self { - Self::new() + *self } } impl<T> ::std::marker::Copy for __BindgenUnionField<T> {} @@ -123,8 +153,6 @@ impl<T> ::std::cmp::PartialEq for __BindgenUnionField<T> { } } impl<T> ::std::cmp::Eq for __BindgenUnionField<T> {} -pub const SHAPE_ID_NUM_BITS: u32 = 32; -pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 11; pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; pub const STRING_REDEFINED_OP_FLAG: u32 = 4; @@ -137,11 +165,13 @@ pub const NIL_REDEFINED_OP_FLAG: u32 = 512; pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024; pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048; pub const PROC_REDEFINED_OP_FLAG: u32 = 4096; +pub const VM_KW_SPECIFIED_BITS_MAX: u32 = 31; pub const VM_ENV_DATA_SIZE: u32 = 3; pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2; pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1; pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0; pub const VM_BLOCK_HANDLER_NONE: u32 = 0; +pub const SHAPE_ID_NUM_BITS: u32 = 32; pub type ID = ::std::os::raw::c_ulong; pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>; pub const RUBY_Qfalse: ruby_special_consts = 0; @@ -192,15 +222,13 @@ pub type ruby_value_type = u32; pub const RUBY_FL_USHIFT: ruby_fl_ushift = 12; pub type ruby_fl_ushift = u32; pub const RUBY_FL_WB_PROTECTED: ruby_fl_type = 32; -pub const RUBY_FL_PROMOTED0: ruby_fl_type = 32; -pub const RUBY_FL_PROMOTED1: ruby_fl_type = 64; -pub const RUBY_FL_PROMOTED: ruby_fl_type = 96; +pub const RUBY_FL_PROMOTED: ruby_fl_type = 32; +pub const RUBY_FL_USERPRIV0: ruby_fl_type = 64; pub const RUBY_FL_FINALIZE: ruby_fl_type = 128; -pub const RUBY_FL_TAINT: ruby_fl_type = 256; +pub const RUBY_FL_EXIVAR: ruby_fl_type = 0; pub const RUBY_FL_SHAREABLE: ruby_fl_type = 256; -pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 256; -pub const RUBY_FL_SEEN_OBJ_ID: ruby_fl_type = 512; -pub const RUBY_FL_EXIVAR: ruby_fl_type = 1024; +pub const RUBY_FL_WEAK_REFERENCE: ruby_fl_type = 512; +pub const RUBY_FL_UNUSED10: ruby_fl_type = 1024; pub const RUBY_FL_FREEZE: ruby_fl_type = 2048; pub const RUBY_FL_USER0: ruby_fl_type = 4096; pub const RUBY_FL_USER1: ruby_fl_type = 8192; @@ -222,8 +250,8 @@ pub const RUBY_FL_USER16: ruby_fl_type = 268435456; pub const RUBY_FL_USER17: ruby_fl_type = 536870912; pub const RUBY_FL_USER18: ruby_fl_type = 1073741824; pub const RUBY_FL_USER19: ruby_fl_type = -2147483648; -pub const RUBY_ELTS_SHARED: ruby_fl_type = 16384; -pub const RUBY_FL_SINGLETON: ruby_fl_type = 4096; +pub const RUBY_ELTS_SHARED: ruby_fl_type = 4096; +pub const RUBY_FL_SINGLETON: ruby_fl_type = 8192; pub type ruby_fl_type = i32; pub const RSTRING_NOEMBED: ruby_rstring_flags = 8192; pub const RSTRING_FSTR: ruby_rstring_flags = 536870912; @@ -245,17 +273,13 @@ pub type st_foreach_callback_func = ::std::option::Option< >; pub const RARRAY_EMBED_FLAG: ruby_rarray_flags = 8192; pub const RARRAY_EMBED_LEN_MASK: ruby_rarray_flags = 4161536; -pub const RARRAY_TRANSIENT_FLAG: ruby_rarray_flags = 33554432; pub type ruby_rarray_flags = u32; pub const RARRAY_EMBED_LEN_SHIFT: ruby_rarray_consts = 15; pub type ruby_rarray_consts = u32; -pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 32768; +pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 8192; pub type ruby_rmodule_flags = u32; -pub const ROBJECT_EMBED: ruby_robject_flags = 8192; +pub const ROBJECT_HEAP: ruby_robject_flags = 65536; pub type ruby_robject_flags = u32; -pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: i32 = 16; -pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: i32 = 24; -pub const ROBJECT_OFFSET_AS_ARY: i32 = 16; pub type rb_block_call_func = ::std::option::Option< unsafe extern "C" fn( yielded_arg: VALUE, @@ -285,233 +309,6 @@ pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; pub type ruby_preserved_encindex = u32; -pub type attr_index_t = u32; -pub type shape_id_t = u32; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct rb_shape { - pub edges: *mut rb_id_table, - pub edge_name: ID, - pub next_iv_index: attr_index_t, - pub capacity: u32, - pub type_: u8, - pub size_pool_index: u8, - pub parent_id: shape_id_t, -} -pub type rb_shape_t = rb_shape; -pub const idDot2: ruby_method_ids = 128; -pub const idDot3: ruby_method_ids = 129; -pub const idUPlus: ruby_method_ids = 132; -pub const idUMinus: ruby_method_ids = 133; -pub const idPow: ruby_method_ids = 134; -pub const idCmp: ruby_method_ids = 135; -pub const idPLUS: ruby_method_ids = 43; -pub const idMINUS: ruby_method_ids = 45; -pub const idMULT: ruby_method_ids = 42; -pub const idDIV: ruby_method_ids = 47; -pub const idMOD: ruby_method_ids = 37; -pub const idLTLT: ruby_method_ids = 136; -pub const idGTGT: ruby_method_ids = 137; -pub const idLT: ruby_method_ids = 60; -pub const idLE: ruby_method_ids = 138; -pub const idGT: ruby_method_ids = 62; -pub const idGE: ruby_method_ids = 139; -pub const idEq: ruby_method_ids = 140; -pub const idEqq: ruby_method_ids = 141; -pub const idNeq: ruby_method_ids = 142; -pub const idNot: ruby_method_ids = 33; -pub const idAnd: ruby_method_ids = 38; -pub const idOr: ruby_method_ids = 124; -pub const idBackquote: ruby_method_ids = 96; -pub const idEqTilde: ruby_method_ids = 143; -pub const idNeqTilde: ruby_method_ids = 144; -pub const idAREF: ruby_method_ids = 145; -pub const idASET: ruby_method_ids = 146; -pub const idCOLON2: ruby_method_ids = 147; -pub const idANDOP: ruby_method_ids = 148; -pub const idOROP: ruby_method_ids = 149; -pub const idANDDOT: ruby_method_ids = 150; -pub const tPRESERVED_ID_BEGIN: ruby_method_ids = 150; -pub const idNilP: ruby_method_ids = 151; -pub const idNULL: ruby_method_ids = 152; -pub const idEmptyP: ruby_method_ids = 153; -pub const idEqlP: ruby_method_ids = 154; -pub const idRespond_to: ruby_method_ids = 155; -pub const idRespond_to_missing: ruby_method_ids = 156; -pub const idIFUNC: ruby_method_ids = 157; -pub const idCFUNC: ruby_method_ids = 158; -pub const id_core_set_method_alias: ruby_method_ids = 159; -pub const id_core_set_variable_alias: ruby_method_ids = 160; -pub const id_core_undef_method: ruby_method_ids = 161; -pub const id_core_define_method: ruby_method_ids = 162; -pub const id_core_define_singleton_method: ruby_method_ids = 163; -pub const id_core_set_postexe: ruby_method_ids = 164; -pub const id_core_hash_merge_ptr: ruby_method_ids = 165; -pub const id_core_hash_merge_kwd: ruby_method_ids = 166; -pub const id_core_raise: ruby_method_ids = 167; -pub const id_core_sprintf: ruby_method_ids = 168; -pub const id_debug_created_info: ruby_method_ids = 169; -pub const tPRESERVED_ID_END: ruby_method_ids = 170; -pub const tTOKEN_LOCAL_BEGIN: ruby_method_ids = 169; -pub const tMax: ruby_method_ids = 170; -pub const tMin: ruby_method_ids = 171; -pub const tFreeze: ruby_method_ids = 172; -pub const tInspect: ruby_method_ids = 173; -pub const tIntern: ruby_method_ids = 174; -pub const tObject_id: ruby_method_ids = 175; -pub const tConst_added: ruby_method_ids = 176; -pub const tConst_missing: ruby_method_ids = 177; -pub const tMethodMissing: ruby_method_ids = 178; -pub const tMethod_added: ruby_method_ids = 179; -pub const tSingleton_method_added: ruby_method_ids = 180; -pub const tMethod_removed: ruby_method_ids = 181; -pub const tSingleton_method_removed: ruby_method_ids = 182; -pub const tMethod_undefined: ruby_method_ids = 183; -pub const tSingleton_method_undefined: ruby_method_ids = 184; -pub const tLength: ruby_method_ids = 185; -pub const tSize: ruby_method_ids = 186; -pub const tGets: ruby_method_ids = 187; -pub const tSucc: ruby_method_ids = 188; -pub const tEach: ruby_method_ids = 189; -pub const tProc: ruby_method_ids = 190; -pub const tLambda: ruby_method_ids = 191; -pub const tSend: ruby_method_ids = 192; -pub const t__send__: ruby_method_ids = 193; -pub const t__attached__: ruby_method_ids = 194; -pub const t__recursive_key__: ruby_method_ids = 195; -pub const tInitialize: ruby_method_ids = 196; -pub const tInitialize_copy: ruby_method_ids = 197; -pub const tInitialize_clone: ruby_method_ids = 198; -pub const tInitialize_dup: ruby_method_ids = 199; -pub const tTo_int: ruby_method_ids = 200; -pub const tTo_ary: ruby_method_ids = 201; -pub const tTo_str: ruby_method_ids = 202; -pub const tTo_sym: ruby_method_ids = 203; -pub const tTo_hash: ruby_method_ids = 204; -pub const tTo_proc: ruby_method_ids = 205; -pub const tTo_io: ruby_method_ids = 206; -pub const tTo_a: ruby_method_ids = 207; -pub const tTo_s: ruby_method_ids = 208; -pub const tTo_i: ruby_method_ids = 209; -pub const tTo_f: ruby_method_ids = 210; -pub const tTo_r: ruby_method_ids = 211; -pub const tBt: ruby_method_ids = 212; -pub const tBt_locations: ruby_method_ids = 213; -pub const tCall: ruby_method_ids = 214; -pub const tMesg: ruby_method_ids = 215; -pub const tException: ruby_method_ids = 216; -pub const tLocals: ruby_method_ids = 217; -pub const tNOT: ruby_method_ids = 218; -pub const tAND: ruby_method_ids = 219; -pub const tOR: ruby_method_ids = 220; -pub const tDiv: ruby_method_ids = 221; -pub const tDivmod: ruby_method_ids = 222; -pub const tFdiv: ruby_method_ids = 223; -pub const tQuo: ruby_method_ids = 224; -pub const tName: ruby_method_ids = 225; -pub const tNil: ruby_method_ids = 226; -pub const tPath: ruby_method_ids = 227; -pub const tUScore: ruby_method_ids = 228; -pub const tNUMPARAM_1: ruby_method_ids = 229; -pub const tNUMPARAM_2: ruby_method_ids = 230; -pub const tNUMPARAM_3: ruby_method_ids = 231; -pub const tNUMPARAM_4: ruby_method_ids = 232; -pub const tNUMPARAM_5: ruby_method_ids = 233; -pub const tNUMPARAM_6: ruby_method_ids = 234; -pub const tNUMPARAM_7: ruby_method_ids = 235; -pub const tNUMPARAM_8: ruby_method_ids = 236; -pub const tNUMPARAM_9: ruby_method_ids = 237; -pub const tDefault: ruby_method_ids = 238; -pub const tTOKEN_LOCAL_END: ruby_method_ids = 239; -pub const tTOKEN_INSTANCE_BEGIN: ruby_method_ids = 238; -pub const tTOKEN_INSTANCE_END: ruby_method_ids = 239; -pub const tTOKEN_GLOBAL_BEGIN: ruby_method_ids = 238; -pub const tLASTLINE: ruby_method_ids = 239; -pub const tBACKREF: ruby_method_ids = 240; -pub const tERROR_INFO: ruby_method_ids = 241; -pub const tTOKEN_GLOBAL_END: ruby_method_ids = 242; -pub const tTOKEN_CONST_BEGIN: ruby_method_ids = 241; -pub const tTOKEN_CONST_END: ruby_method_ids = 242; -pub const tTOKEN_CLASS_BEGIN: ruby_method_ids = 241; -pub const tTOKEN_CLASS_END: ruby_method_ids = 242; -pub const tTOKEN_ATTRSET_BEGIN: ruby_method_ids = 241; -pub const tTOKEN_ATTRSET_END: ruby_method_ids = 242; -pub const tNEXT_ID: ruby_method_ids = 242; -pub const idMax: ruby_method_ids = 2721; -pub const idMin: ruby_method_ids = 2737; -pub const idFreeze: ruby_method_ids = 2753; -pub const idInspect: ruby_method_ids = 2769; -pub const idIntern: ruby_method_ids = 2785; -pub const idObject_id: ruby_method_ids = 2801; -pub const idConst_added: ruby_method_ids = 2817; -pub const idConst_missing: ruby_method_ids = 2833; -pub const idMethodMissing: ruby_method_ids = 2849; -pub const idMethod_added: ruby_method_ids = 2865; -pub const idSingleton_method_added: ruby_method_ids = 2881; -pub const idMethod_removed: ruby_method_ids = 2897; -pub const idSingleton_method_removed: ruby_method_ids = 2913; -pub const idMethod_undefined: ruby_method_ids = 2929; -pub const idSingleton_method_undefined: ruby_method_ids = 2945; -pub const idLength: ruby_method_ids = 2961; -pub const idSize: ruby_method_ids = 2977; -pub const idGets: ruby_method_ids = 2993; -pub const idSucc: ruby_method_ids = 3009; -pub const idEach: ruby_method_ids = 3025; -pub const idProc: ruby_method_ids = 3041; -pub const idLambda: ruby_method_ids = 3057; -pub const idSend: ruby_method_ids = 3073; -pub const id__send__: ruby_method_ids = 3089; -pub const id__attached__: ruby_method_ids = 3105; -pub const id__recursive_key__: ruby_method_ids = 3121; -pub const idInitialize: ruby_method_ids = 3137; -pub const idInitialize_copy: ruby_method_ids = 3153; -pub const idInitialize_clone: ruby_method_ids = 3169; -pub const idInitialize_dup: ruby_method_ids = 3185; -pub const idTo_int: ruby_method_ids = 3201; -pub const idTo_ary: ruby_method_ids = 3217; -pub const idTo_str: ruby_method_ids = 3233; -pub const idTo_sym: ruby_method_ids = 3249; -pub const idTo_hash: ruby_method_ids = 3265; -pub const idTo_proc: ruby_method_ids = 3281; -pub const idTo_io: ruby_method_ids = 3297; -pub const idTo_a: ruby_method_ids = 3313; -pub const idTo_s: ruby_method_ids = 3329; -pub const idTo_i: ruby_method_ids = 3345; -pub const idTo_f: ruby_method_ids = 3361; -pub const idTo_r: ruby_method_ids = 3377; -pub const idBt: ruby_method_ids = 3393; -pub const idBt_locations: ruby_method_ids = 3409; -pub const idCall: ruby_method_ids = 3425; -pub const idMesg: ruby_method_ids = 3441; -pub const idException: ruby_method_ids = 3457; -pub const idLocals: ruby_method_ids = 3473; -pub const idNOT: ruby_method_ids = 3489; -pub const idAND: ruby_method_ids = 3505; -pub const idOR: ruby_method_ids = 3521; -pub const idDiv: ruby_method_ids = 3537; -pub const idDivmod: ruby_method_ids = 3553; -pub const idFdiv: ruby_method_ids = 3569; -pub const idQuo: ruby_method_ids = 3585; -pub const idName: ruby_method_ids = 3601; -pub const idNil: ruby_method_ids = 3617; -pub const idPath: ruby_method_ids = 3633; -pub const idUScore: ruby_method_ids = 3649; -pub const idNUMPARAM_1: ruby_method_ids = 3665; -pub const idNUMPARAM_2: ruby_method_ids = 3681; -pub const idNUMPARAM_3: ruby_method_ids = 3697; -pub const idNUMPARAM_4: ruby_method_ids = 3713; -pub const idNUMPARAM_5: ruby_method_ids = 3729; -pub const idNUMPARAM_6: ruby_method_ids = 3745; -pub const idNUMPARAM_7: ruby_method_ids = 3761; -pub const idNUMPARAM_8: ruby_method_ids = 3777; -pub const idNUMPARAM_9: ruby_method_ids = 3793; -pub const idDefault: ruby_method_ids = 3809; -pub const idLASTLINE: ruby_method_ids = 3831; -pub const idBACKREF: ruby_method_ids = 3847; -pub const idERROR_INFO: ruby_method_ids = 3863; -pub const tLAST_OP_ID: ruby_method_ids = 169; -pub const idLAST_OP_ID: ruby_method_ids = 10; -pub type ruby_method_ids = u32; pub const BOP_PLUS: ruby_basic_operators = 0; pub const BOP_MINUS: ruby_basic_operators = 1; pub const BOP_MULT: ruby_basic_operators = 2; @@ -531,19 +328,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_CALL: ruby_basic_operators = 26; -pub const BOP_AND: ruby_basic_operators = 27; -pub const BOP_OR: ruby_basic_operators = 28; -pub const BOP_CMP: ruby_basic_operators = 29; -pub const BOP_DEFAULT: ruby_basic_operators = 30; -pub const BOP_LAST_: ruby_basic_operators = 31; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; pub const imemo_env: imemo_type = 0; @@ -555,11 +356,10 @@ pub const imemo_memo: imemo_type = 5; pub const imemo_ment: imemo_type = 6; pub const imemo_iseq: imemo_type = 7; pub const imemo_tmpbuf: imemo_type = 8; -pub const imemo_ast: imemo_type = 9; -pub const imemo_parser_strterm: imemo_type = 10; -pub const imemo_callinfo: imemo_type = 11; -pub const imemo_callcache: imemo_type = 12; -pub const imemo_constcache: imemo_type = 13; +pub const imemo_callinfo: imemo_type = 10; +pub const imemo_callcache: imemo_type = 11; +pub const imemo_constcache: imemo_type = 12; +pub const imemo_fields: imemo_type = 13; pub type imemo_type = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -570,7 +370,7 @@ pub struct vm_ifunc_argc { #[repr(C)] pub struct vm_ifunc { pub flags: VALUE, - pub reserved: VALUE, + pub svar_lep: *mut VALUE, pub func: rb_block_call_func_t, pub data: *const ::std::os::raw::c_void, pub argc: vm_ifunc_argc, @@ -612,10 +412,11 @@ pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9; pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10; pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11; pub type rb_method_type_t = u32; +pub type rb_cfunc_t = ::std::option::Option<unsafe extern "C" fn() -> VALUE>; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct rb_method_cfunc_struct { - pub func: ::std::option::Option<unsafe extern "C" fn() -> VALUE>, + pub func: rb_cfunc_t, pub invoker: ::std::option::Option< unsafe extern "C" fn( recv: VALUE, @@ -633,18 +434,25 @@ pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3; pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4; pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5; pub type method_optimized_type = u32; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct rb_id_table { - _unused: [u8; 0], -} pub type rb_num_t = ::std::os::raw::c_ulong; +pub const RUBY_TAG_NONE: ruby_tag_type = 0; +pub const RUBY_TAG_RETURN: ruby_tag_type = 1; +pub const RUBY_TAG_BREAK: ruby_tag_type = 2; +pub const RUBY_TAG_NEXT: ruby_tag_type = 3; +pub const RUBY_TAG_RETRY: ruby_tag_type = 4; +pub const RUBY_TAG_REDO: ruby_tag_type = 5; +pub const RUBY_TAG_RAISE: ruby_tag_type = 6; +pub const RUBY_TAG_THROW: ruby_tag_type = 7; +pub const RUBY_TAG_FATAL: ruby_tag_type = 8; +pub const RUBY_TAG_MASK: ruby_tag_type = 15; +pub type ruby_tag_type = u32; +pub const VM_THROW_NO_ESCAPE_FLAG: ruby_vm_throw_flags = 32768; +pub const VM_THROW_STATE_MASK: ruby_vm_throw_flags = 255; +pub type ruby_vm_throw_flags = u32; #[repr(C)] pub struct iseq_inline_constant_cache_entry { pub flags: VALUE, pub value: VALUE, - pub _unused1: VALUE, - pub _unused2: VALUE, pub ic_cref: *const rb_cref_t, } #[repr(C)] @@ -656,7 +464,7 @@ pub struct iseq_inline_constant_cache { #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct iseq_inline_iv_cache_entry { - pub value: usize, + pub value: u64, pub iv_set_name: ID, } #[repr(C)] @@ -664,9 +472,24 @@ pub struct iseq_inline_iv_cache_entry { pub struct iseq_inline_cvar_cache_entry { pub entry: *mut rb_cvar_class_tbl_entry, } +pub const ISEQ_TYPE_TOP: rb_iseq_type = 0; +pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1; +pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2; +pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3; +pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4; +pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5; +pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6; +pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7; +pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8; +pub type rb_iseq_type = u32; +pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1; +pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2; +pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4; +pub const BUILTIN_ATTR_C_TRACE: rb_builtin_attr = 8; +pub type rb_builtin_attr = u32; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword { +pub struct rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword { pub num: ::std::os::raw::c_int, pub required_num: ::std::os::raw::c_int, pub bits_start: ::std::os::raw::c_int, @@ -768,6 +591,17 @@ impl rb_proc_t { __bindgen_bitfield_unit } } +pub const VM_CHECKMATCH_TYPE_WHEN: vm_check_match_type = 1; +pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2; +pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3; +pub type vm_check_match_type = u32; +pub const VM_OPT_NEWARRAY_SEND_MAX: vm_opt_newarray_send_type = 1; +pub const VM_OPT_NEWARRAY_SEND_MIN: vm_opt_newarray_send_type = 2; +pub const VM_OPT_NEWARRAY_SEND_HASH: vm_opt_newarray_send_type = 3; +pub const VM_OPT_NEWARRAY_SEND_PACK: vm_opt_newarray_send_type = 4; +pub const VM_OPT_NEWARRAY_SEND_PACK_BUFFER: vm_opt_newarray_send_type = 5; +pub const VM_OPT_NEWARRAY_SEND_INCLUDE_P: vm_opt_newarray_send_type = 6; +pub type vm_opt_newarray_send_type = u32; pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1; pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2; pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3; @@ -792,15 +626,21 @@ pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256; pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512; pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024; pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048; +pub const VM_FRAME_FLAG_BOX_REQUIRE: vm_frame_env_flags = 4096; pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2; pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; +pub type attr_index_t = u16; +pub type shape_id_t = u32; +pub const SHAPE_ID_HAS_IVAR_MASK: shape_id_mask = 134742014; +pub type shape_id_mask = u32; #[repr(C)] pub struct rb_cvar_class_tbl_entry { pub index: u32, pub global_cvar_state: rb_serial_t, + pub cref: *const rb_cref_t, pub class_value: VALUE, } pub const VM_CALL_ARGS_SPLAT_bit: vm_call_flag_bits = 0; @@ -808,17 +648,24 @@ pub const VM_CALL_ARGS_BLOCKARG_bit: vm_call_flag_bits = 1; pub const VM_CALL_FCALL_bit: vm_call_flag_bits = 2; pub const VM_CALL_VCALL_bit: vm_call_flag_bits = 3; pub const VM_CALL_ARGS_SIMPLE_bit: vm_call_flag_bits = 4; -pub const VM_CALL_BLOCKISEQ_bit: vm_call_flag_bits = 5; -pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 6; -pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 7; -pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 8; -pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 9; -pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 10; -pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 11; -pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 12; -pub const VM_CALL__END: vm_call_flag_bits = 13; +pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 5; +pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 6; +pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 7; +pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 8; +pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 9; +pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 10; +pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 11; +pub const VM_CALL_ARGS_SPLAT_MUT_bit: vm_call_flag_bits = 12; +pub const VM_CALL_FORWARDING_bit: vm_call_flag_bits = 13; +pub const VM_CALL__END: vm_call_flag_bits = 14; pub type vm_call_flag_bits = u32; #[repr(C)] +pub struct rb_callinfo_kwarg { + pub keyword_len: ::std::os::raw::c_int, + pub references: ::std::os::raw::c_int, + pub keywords: __IncompleteArrayField<VALUE>, +} +#[repr(C)] pub struct rb_callinfo { pub flags: VALUE, pub kwarg: *const rb_callinfo_kwarg, @@ -832,6 +679,8 @@ pub struct rb_call_data { pub ci: *const rb_callinfo, pub cc: *const rb_callcache, } +pub const RSTRING_CHILLED: ruby_rstring_private_flags = 49152; +pub type ruby_rstring_private_flags = u32; pub const RHASH_PASS_AS_KEYWORDS: ruby_rhash_flags = 8192; pub const RHASH_PROC_DEFAULT: ruby_rhash_flags = 16384; pub const RHASH_ST_TABLE_FLAG: ruby_rhash_flags = 32768; @@ -839,7 +688,6 @@ pub const RHASH_AR_TABLE_SIZE_MASK: ruby_rhash_flags = 983040; pub const RHASH_AR_TABLE_SIZE_SHIFT: ruby_rhash_flags = 16; pub const RHASH_AR_TABLE_BOUND_MASK: ruby_rhash_flags = 15728640; pub const RHASH_AR_TABLE_BOUND_SHIFT: ruby_rhash_flags = 20; -pub const RHASH_TRANSIENT_FLAG: ruby_rhash_flags = 16777216; pub const RHASH_LEV_SHIFT: ruby_rhash_flags = 25; pub const RHASH_LEV_MAX: ruby_rhash_flags = 127; pub type ruby_rhash_flags = u32; @@ -850,14 +698,6 @@ pub struct rb_builtin_function { pub argc: ::std::os::raw::c_int, pub index: ::std::os::raw::c_int, pub name: *const ::std::os::raw::c_char, - pub compiler: ::std::option::Option< - unsafe extern "C" fn( - arg1: VALUE, - arg2: ::std::os::raw::c_long, - arg3: ::std::os::raw::c_uint, - arg4: bool, - ), - >, } pub const YARVINSN_nop: ruby_vminsn_type = 0; pub const YARVINSN_getlocal: ruby_vminsn_type = 1; @@ -881,229 +721,326 @@ pub const YARVINSN_putself: ruby_vminsn_type = 18; pub const YARVINSN_putobject: ruby_vminsn_type = 19; pub const YARVINSN_putspecialobject: ruby_vminsn_type = 20; pub const YARVINSN_putstring: ruby_vminsn_type = 21; -pub const YARVINSN_concatstrings: ruby_vminsn_type = 22; -pub const YARVINSN_anytostring: ruby_vminsn_type = 23; -pub const YARVINSN_toregexp: ruby_vminsn_type = 24; -pub const YARVINSN_intern: ruby_vminsn_type = 25; -pub const YARVINSN_newarray: ruby_vminsn_type = 26; -pub const YARVINSN_newarraykwsplat: ruby_vminsn_type = 27; -pub const YARVINSN_duparray: ruby_vminsn_type = 28; -pub const YARVINSN_duphash: ruby_vminsn_type = 29; -pub const YARVINSN_expandarray: ruby_vminsn_type = 30; -pub const YARVINSN_concatarray: ruby_vminsn_type = 31; -pub const YARVINSN_splatarray: ruby_vminsn_type = 32; -pub const YARVINSN_newhash: ruby_vminsn_type = 33; -pub const YARVINSN_newrange: ruby_vminsn_type = 34; -pub const YARVINSN_pop: ruby_vminsn_type = 35; -pub const YARVINSN_dup: ruby_vminsn_type = 36; -pub const YARVINSN_dupn: ruby_vminsn_type = 37; -pub const YARVINSN_swap: ruby_vminsn_type = 38; -pub const YARVINSN_opt_reverse: ruby_vminsn_type = 39; -pub const YARVINSN_topn: ruby_vminsn_type = 40; -pub const YARVINSN_setn: ruby_vminsn_type = 41; -pub const YARVINSN_adjuststack: ruby_vminsn_type = 42; -pub const YARVINSN_defined: ruby_vminsn_type = 43; -pub const YARVINSN_checkmatch: ruby_vminsn_type = 44; -pub const YARVINSN_checkkeyword: ruby_vminsn_type = 45; -pub const YARVINSN_checktype: ruby_vminsn_type = 46; -pub const YARVINSN_defineclass: ruby_vminsn_type = 47; -pub const YARVINSN_definemethod: ruby_vminsn_type = 48; -pub const YARVINSN_definesmethod: ruby_vminsn_type = 49; -pub const YARVINSN_send: ruby_vminsn_type = 50; -pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 51; -pub const YARVINSN_objtostring: ruby_vminsn_type = 52; -pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 53; -pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 54; -pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 55; -pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 56; -pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 57; -pub const YARVINSN_invokesuper: ruby_vminsn_type = 58; -pub const YARVINSN_invokeblock: ruby_vminsn_type = 59; -pub const YARVINSN_leave: ruby_vminsn_type = 60; -pub const YARVINSN_throw: ruby_vminsn_type = 61; -pub const YARVINSN_jump: ruby_vminsn_type = 62; -pub const YARVINSN_branchif: ruby_vminsn_type = 63; -pub const YARVINSN_branchunless: ruby_vminsn_type = 64; -pub const YARVINSN_branchnil: ruby_vminsn_type = 65; -pub const YARVINSN_once: ruby_vminsn_type = 66; -pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 67; -pub const YARVINSN_opt_plus: ruby_vminsn_type = 68; -pub const YARVINSN_opt_minus: ruby_vminsn_type = 69; -pub const YARVINSN_opt_mult: ruby_vminsn_type = 70; -pub const YARVINSN_opt_div: ruby_vminsn_type = 71; -pub const YARVINSN_opt_mod: ruby_vminsn_type = 72; -pub const YARVINSN_opt_eq: ruby_vminsn_type = 73; -pub const YARVINSN_opt_neq: ruby_vminsn_type = 74; -pub const YARVINSN_opt_lt: ruby_vminsn_type = 75; -pub const YARVINSN_opt_le: ruby_vminsn_type = 76; -pub const YARVINSN_opt_gt: ruby_vminsn_type = 77; -pub const YARVINSN_opt_ge: ruby_vminsn_type = 78; -pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 79; -pub const YARVINSN_opt_and: ruby_vminsn_type = 80; -pub const YARVINSN_opt_or: ruby_vminsn_type = 81; -pub const YARVINSN_opt_aref: ruby_vminsn_type = 82; -pub const YARVINSN_opt_aset: ruby_vminsn_type = 83; -pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 84; -pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 85; -pub const YARVINSN_opt_length: ruby_vminsn_type = 86; -pub const YARVINSN_opt_size: ruby_vminsn_type = 87; -pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 88; -pub const YARVINSN_opt_succ: ruby_vminsn_type = 89; -pub const YARVINSN_opt_not: ruby_vminsn_type = 90; -pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 91; -pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 92; -pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 93; -pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 94; -pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 95; -pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 96; -pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 97; -pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 98; -pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 99; -pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 100; -pub const YARVINSN_trace_nop: ruby_vminsn_type = 101; -pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 102; -pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 103; -pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 104; -pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 105; -pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 106; -pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 107; -pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 108; -pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 109; -pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 110; -pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 111; -pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 112; -pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 113; -pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 114; -pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 115; -pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 116; -pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 117; -pub const YARVINSN_trace_putnil: ruby_vminsn_type = 118; -pub const YARVINSN_trace_putself: ruby_vminsn_type = 119; -pub const YARVINSN_trace_putobject: ruby_vminsn_type = 120; -pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 121; -pub const YARVINSN_trace_putstring: ruby_vminsn_type = 122; -pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 123; -pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 124; -pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 125; -pub const YARVINSN_trace_intern: ruby_vminsn_type = 126; -pub const YARVINSN_trace_newarray: ruby_vminsn_type = 127; -pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 128; -pub const YARVINSN_trace_duparray: ruby_vminsn_type = 129; -pub const YARVINSN_trace_duphash: ruby_vminsn_type = 130; -pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 131; -pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 132; -pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 133; -pub const YARVINSN_trace_newhash: ruby_vminsn_type = 134; -pub const YARVINSN_trace_newrange: ruby_vminsn_type = 135; -pub const YARVINSN_trace_pop: ruby_vminsn_type = 136; -pub const YARVINSN_trace_dup: ruby_vminsn_type = 137; -pub const YARVINSN_trace_dupn: ruby_vminsn_type = 138; -pub const YARVINSN_trace_swap: ruby_vminsn_type = 139; -pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 140; -pub const YARVINSN_trace_topn: ruby_vminsn_type = 141; -pub const YARVINSN_trace_setn: ruby_vminsn_type = 142; -pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 143; -pub const YARVINSN_trace_defined: ruby_vminsn_type = 144; -pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 145; -pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 146; -pub const YARVINSN_trace_checktype: ruby_vminsn_type = 147; -pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 148; -pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 149; -pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 150; -pub const YARVINSN_trace_send: ruby_vminsn_type = 151; -pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 152; -pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 153; -pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 154; -pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 155; -pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 156; -pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 157; -pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 158; -pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 159; -pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 160; -pub const YARVINSN_trace_leave: ruby_vminsn_type = 161; -pub const YARVINSN_trace_throw: ruby_vminsn_type = 162; -pub const YARVINSN_trace_jump: ruby_vminsn_type = 163; -pub const YARVINSN_trace_branchif: ruby_vminsn_type = 164; -pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 165; -pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 166; -pub const YARVINSN_trace_once: ruby_vminsn_type = 167; -pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 168; -pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 169; -pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 170; -pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 171; -pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 172; -pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 173; -pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 174; -pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 175; -pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 176; -pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 177; -pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 178; -pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 179; -pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 180; -pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 181; -pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 182; -pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 183; -pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 184; -pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 185; -pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 186; -pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 187; -pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 188; -pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 189; -pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 190; -pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 191; -pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 192; -pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 193; -pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 194; -pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 195; -pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 196; -pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 197; -pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 198; -pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 199; -pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 200; -pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 201; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 202; +pub const YARVINSN_putchilledstring: ruby_vminsn_type = 22; +pub const YARVINSN_concatstrings: ruby_vminsn_type = 23; +pub const YARVINSN_anytostring: ruby_vminsn_type = 24; +pub const YARVINSN_toregexp: ruby_vminsn_type = 25; +pub const YARVINSN_intern: ruby_vminsn_type = 26; +pub const YARVINSN_newarray: ruby_vminsn_type = 27; +pub const YARVINSN_pushtoarraykwsplat: ruby_vminsn_type = 28; +pub const YARVINSN_duparray: ruby_vminsn_type = 29; +pub const YARVINSN_duphash: ruby_vminsn_type = 30; +pub const YARVINSN_expandarray: ruby_vminsn_type = 31; +pub const YARVINSN_concatarray: ruby_vminsn_type = 32; +pub const YARVINSN_concattoarray: ruby_vminsn_type = 33; +pub const YARVINSN_pushtoarray: ruby_vminsn_type = 34; +pub const YARVINSN_splatarray: ruby_vminsn_type = 35; +pub const YARVINSN_splatkw: ruby_vminsn_type = 36; +pub const YARVINSN_newhash: ruby_vminsn_type = 37; +pub const YARVINSN_newrange: ruby_vminsn_type = 38; +pub const YARVINSN_pop: ruby_vminsn_type = 39; +pub const YARVINSN_dup: ruby_vminsn_type = 40; +pub const YARVINSN_dupn: ruby_vminsn_type = 41; +pub const YARVINSN_swap: ruby_vminsn_type = 42; +pub const YARVINSN_opt_reverse: ruby_vminsn_type = 43; +pub const YARVINSN_topn: ruby_vminsn_type = 44; +pub const YARVINSN_setn: ruby_vminsn_type = 45; +pub const YARVINSN_adjuststack: ruby_vminsn_type = 46; +pub const YARVINSN_defined: ruby_vminsn_type = 47; +pub const YARVINSN_definedivar: ruby_vminsn_type = 48; +pub const YARVINSN_checkmatch: ruby_vminsn_type = 49; +pub const YARVINSN_checkkeyword: ruby_vminsn_type = 50; +pub const YARVINSN_checktype: ruby_vminsn_type = 51; +pub const YARVINSN_defineclass: ruby_vminsn_type = 52; +pub const YARVINSN_definemethod: ruby_vminsn_type = 53; +pub const YARVINSN_definesmethod: ruby_vminsn_type = 54; +pub const YARVINSN_send: ruby_vminsn_type = 55; +pub const YARVINSN_sendforward: ruby_vminsn_type = 56; +pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 57; +pub const YARVINSN_opt_new: ruby_vminsn_type = 58; +pub const YARVINSN_objtostring: ruby_vminsn_type = 59; +pub const YARVINSN_opt_ary_freeze: ruby_vminsn_type = 60; +pub const YARVINSN_opt_hash_freeze: ruby_vminsn_type = 61; +pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 62; +pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 63; +pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 64; +pub const YARVINSN_opt_duparray_send: ruby_vminsn_type = 65; +pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 66; +pub const YARVINSN_invokesuper: ruby_vminsn_type = 67; +pub const YARVINSN_invokesuperforward: ruby_vminsn_type = 68; +pub const YARVINSN_invokeblock: ruby_vminsn_type = 69; +pub const YARVINSN_leave: ruby_vminsn_type = 70; +pub const YARVINSN_throw: ruby_vminsn_type = 71; +pub const YARVINSN_jump: ruby_vminsn_type = 72; +pub const YARVINSN_branchif: ruby_vminsn_type = 73; +pub const YARVINSN_branchunless: ruby_vminsn_type = 74; +pub const YARVINSN_branchnil: ruby_vminsn_type = 75; +pub const YARVINSN_once: ruby_vminsn_type = 76; +pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 77; +pub const YARVINSN_opt_plus: ruby_vminsn_type = 78; +pub const YARVINSN_opt_minus: ruby_vminsn_type = 79; +pub const YARVINSN_opt_mult: ruby_vminsn_type = 80; +pub const YARVINSN_opt_div: ruby_vminsn_type = 81; +pub const YARVINSN_opt_mod: ruby_vminsn_type = 82; +pub const YARVINSN_opt_eq: ruby_vminsn_type = 83; +pub const YARVINSN_opt_neq: ruby_vminsn_type = 84; +pub const YARVINSN_opt_lt: ruby_vminsn_type = 85; +pub const YARVINSN_opt_le: ruby_vminsn_type = 86; +pub const YARVINSN_opt_gt: ruby_vminsn_type = 87; +pub const YARVINSN_opt_ge: ruby_vminsn_type = 88; +pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 89; +pub const YARVINSN_opt_and: ruby_vminsn_type = 90; +pub const YARVINSN_opt_or: ruby_vminsn_type = 91; +pub const YARVINSN_opt_aref: ruby_vminsn_type = 92; +pub const YARVINSN_opt_aset: ruby_vminsn_type = 93; +pub const YARVINSN_opt_length: ruby_vminsn_type = 94; +pub const YARVINSN_opt_size: ruby_vminsn_type = 95; +pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 96; +pub const YARVINSN_opt_succ: ruby_vminsn_type = 97; +pub const YARVINSN_opt_not: ruby_vminsn_type = 98; +pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 99; +pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 100; +pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 101; +pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 102; +pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 103; +pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 104; +pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 105; +pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 106; +pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 107; +pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 108; +pub const YARVINSN_trace_nop: ruby_vminsn_type = 109; +pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 110; +pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 111; +pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 112; +pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 113; +pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 114; +pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 115; +pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 116; +pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 117; +pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 118; +pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 119; +pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 120; +pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 121; +pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 122; +pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 123; +pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 124; +pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 125; +pub const YARVINSN_trace_putnil: ruby_vminsn_type = 126; +pub const YARVINSN_trace_putself: ruby_vminsn_type = 127; +pub const YARVINSN_trace_putobject: ruby_vminsn_type = 128; +pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 129; +pub const YARVINSN_trace_putstring: ruby_vminsn_type = 130; +pub const YARVINSN_trace_putchilledstring: ruby_vminsn_type = 131; +pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 132; +pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 133; +pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 134; +pub const YARVINSN_trace_intern: ruby_vminsn_type = 135; +pub const YARVINSN_trace_newarray: ruby_vminsn_type = 136; +pub const YARVINSN_trace_pushtoarraykwsplat: ruby_vminsn_type = 137; +pub const YARVINSN_trace_duparray: ruby_vminsn_type = 138; +pub const YARVINSN_trace_duphash: ruby_vminsn_type = 139; +pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 140; +pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 141; +pub const YARVINSN_trace_concattoarray: ruby_vminsn_type = 142; +pub const YARVINSN_trace_pushtoarray: ruby_vminsn_type = 143; +pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 144; +pub const YARVINSN_trace_splatkw: ruby_vminsn_type = 145; +pub const YARVINSN_trace_newhash: ruby_vminsn_type = 146; +pub const YARVINSN_trace_newrange: ruby_vminsn_type = 147; +pub const YARVINSN_trace_pop: ruby_vminsn_type = 148; +pub const YARVINSN_trace_dup: ruby_vminsn_type = 149; +pub const YARVINSN_trace_dupn: ruby_vminsn_type = 150; +pub const YARVINSN_trace_swap: ruby_vminsn_type = 151; +pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 152; +pub const YARVINSN_trace_topn: ruby_vminsn_type = 153; +pub const YARVINSN_trace_setn: ruby_vminsn_type = 154; +pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 155; +pub const YARVINSN_trace_defined: ruby_vminsn_type = 156; +pub const YARVINSN_trace_definedivar: ruby_vminsn_type = 157; +pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 158; +pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 159; +pub const YARVINSN_trace_checktype: ruby_vminsn_type = 160; +pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 161; +pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 162; +pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 163; +pub const YARVINSN_trace_send: ruby_vminsn_type = 164; +pub const YARVINSN_trace_sendforward: ruby_vminsn_type = 165; +pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 166; +pub const YARVINSN_trace_opt_new: ruby_vminsn_type = 167; +pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 168; +pub const YARVINSN_trace_opt_ary_freeze: ruby_vminsn_type = 169; +pub const YARVINSN_trace_opt_hash_freeze: ruby_vminsn_type = 170; +pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 171; +pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 172; +pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 173; +pub const YARVINSN_trace_opt_duparray_send: ruby_vminsn_type = 174; +pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 175; +pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 176; +pub const YARVINSN_trace_invokesuperforward: ruby_vminsn_type = 177; +pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 178; +pub const YARVINSN_trace_leave: ruby_vminsn_type = 179; +pub const YARVINSN_trace_throw: ruby_vminsn_type = 180; +pub const YARVINSN_trace_jump: ruby_vminsn_type = 181; +pub const YARVINSN_trace_branchif: ruby_vminsn_type = 182; +pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 183; +pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 184; +pub const YARVINSN_trace_once: ruby_vminsn_type = 185; +pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 186; +pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 187; +pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 188; +pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 189; +pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 190; +pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 191; +pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 192; +pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 193; +pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 194; +pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 195; +pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 196; +pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 197; +pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 198; +pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 199; +pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 200; +pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 201; +pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 202; +pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 203; +pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 204; +pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 205; +pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 206; +pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 207; +pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 208; +pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 209; +pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 210; +pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 211; +pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 212; +pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 213; +pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; +pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; +pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; +pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), >; -pub const RUBY_OFFSET_RSTRING_AS_HEAP_LEN: rstring_offsets = 16; -pub const RUBY_OFFSET_RSTRING_EMBED_LEN: rstring_offsets = 16; -pub type rstring_offsets = u32; -pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +pub const DEFINED_NOT_DEFINED: defined_type = 0; +pub const DEFINED_NIL: defined_type = 1; +pub const DEFINED_IVAR: defined_type = 2; +pub const DEFINED_LVAR: defined_type = 3; +pub const DEFINED_GVAR: defined_type = 4; +pub const DEFINED_CVAR: defined_type = 5; +pub const DEFINED_CONST: defined_type = 6; +pub const DEFINED_METHOD: defined_type = 7; +pub const DEFINED_YIELD: defined_type = 8; +pub const DEFINED_ZSUPER: defined_type = 9; +pub const DEFINED_SELF: defined_type = 10; +pub const DEFINED_TRUE: defined_type = 11; +pub const DEFINED_FALSE: defined_type = 12; +pub const DEFINED_ASGN: defined_type = 13; +pub const DEFINED_EXPR: defined_type = 14; +pub const DEFINED_REF: defined_type = 15; +pub const DEFINED_FUNC: defined_type = 16; +pub const DEFINED_CONST_FROM: defined_type = 17; +pub type defined_type = u32; +pub type rb_seq_param_keyword_struct = + rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword; +pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: jit_bindgen_constants = 16; +pub const ROBJECT_OFFSET_AS_ARY: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_RSTRING_LEN: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_EC_CFP: jit_bindgen_constants = 16; +pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: jit_bindgen_constants = 32; +pub const RUBY_OFFSET_EC_INTERRUPT_MASK: jit_bindgen_constants = 36; +pub const RUBY_OFFSET_EC_THREAD_PTR: jit_bindgen_constants = 48; +pub const RUBY_OFFSET_EC_RACTOR_ID: jit_bindgen_constants = 64; +pub type jit_bindgen_constants = u32; +pub type rb_iseq_param_keyword_struct = + rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword; extern "C" { + pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); + pub fn rb_class_attached_object(klass: VALUE) -> VALUE; pub fn rb_singleton_class(obj: VALUE) -> VALUE; pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; pub fn rb_method_basic_definition_p(klass: VALUE, mid: ID) -> ::std::os::raw::c_int; + pub fn rb_bug(fmt: *const ::std::os::raw::c_char, ...) -> !; + pub fn rb_float_new(d: f64) -> VALUE; + pub fn rb_gc_mark(obj: VALUE); + pub fn rb_gc_mark_movable(obj: VALUE); + pub fn rb_gc_location(obj: VALUE) -> VALUE; pub fn rb_gc_writebarrier(old: VALUE, young: VALUE); pub fn rb_class_get_superclass(klass: VALUE) -> VALUE; + pub fn rb_funcall(recv: VALUE, mid: ID, n: ::std::os::raw::c_int, ...) -> VALUE; pub static mut rb_mKernel: VALUE; pub static mut rb_cBasicObject: VALUE; pub static mut rb_cArray: VALUE; + pub static mut rb_cClass: VALUE; pub static mut rb_cFalseClass: VALUE; pub static mut rb_cFloat: VALUE; pub static mut rb_cHash: VALUE; + pub static mut rb_cIO: VALUE; pub static mut rb_cInteger: VALUE; pub static mut rb_cModule: VALUE; pub static mut rb_cNilClass: VALUE; + pub static mut rb_cNumeric: VALUE; pub static mut rb_cString: VALUE; pub static mut rb_cSymbol: VALUE; pub static mut rb_cThread: VALUE; pub static mut rb_cTrueClass: VALUE; + pub fn rb_obj_class(obj: VALUE) -> VALUE; pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE; pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE); + pub fn rb_ary_dup(ary: VALUE) -> VALUE; pub fn rb_ary_resurrect(ary: VALUE) -> VALUE; + pub fn rb_ary_cat(ary: VALUE, train: *const VALUE, len: ::std::os::raw::c_long) -> VALUE; + pub fn rb_ary_push(ary: VALUE, elem: VALUE) -> VALUE; pub fn rb_ary_clear(ary: VALUE) -> VALUE; pub fn rb_hash_new() -> VALUE; pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE; pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE; pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE); + pub fn rb_obj_is_proc(recv: VALUE) -> VALUE; pub fn rb_sym2id(obj: VALUE) -> ID; pub fn rb_id2sym(id: ID) -> VALUE; pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID; - pub fn rb_gc_mark(obj: VALUE); - pub fn rb_gc_mark_movable(obj: VALUE); - pub fn rb_gc_location(obj: VALUE) -> VALUE; + pub fn rb_intern2(name: *const ::std::os::raw::c_char, len: ::std::os::raw::c_long) -> ID; + pub fn rb_id2name(id: ID) -> *const ::std::os::raw::c_char; + pub fn rb_class2name(klass: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_class_new_instance_pass_kw( + argc: ::std::os::raw::c_int, + argv: *const VALUE, + klass: VALUE, + ) -> VALUE; pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE; + pub fn rb_obj_alloc(klass: VALUE) -> VALUE; pub fn rb_obj_frozen_p(obj: VALUE) -> VALUE; pub fn rb_backref_get() -> VALUE; pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE; @@ -1119,23 +1056,15 @@ extern "C" { pub fn rb_str_buf_append(dst: VALUE, src: VALUE) -> VALUE; pub fn rb_str_dup(str_: VALUE) -> VALUE; pub fn rb_str_intern(str_: VALUE) -> VALUE; + pub fn rb_mod_name(mod_: VALUE) -> VALUE; pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE; + pub fn rb_ivar_defined(obj: VALUE, name: ID) -> VALUE; pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE; + pub fn rb_const_get(space: VALUE, name: ID) -> VALUE; pub fn rb_obj_info_dump(obj: VALUE); - pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE; pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE; - pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char; - pub fn rb_shape_id_offset() -> i32; - pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t; - pub fn rb_shape_get_shape_id(obj: VALUE) -> shape_id_t; - pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool; - pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool; - pub fn rb_shape_transition_shape_capa( - shape: *mut rb_shape_t, - new_capacity: u32, - ) -> *mut rb_shape_t; - pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t; - pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t; + pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE; + pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE; pub fn rb_ary_tmp_new_from_values( arg1: VALUE, arg2: ::std::os::raw::c_long, @@ -1146,23 +1075,55 @@ extern "C" { n: ::std::os::raw::c_long, elts: *const VALUE, ) -> VALUE; + pub fn rb_vm_top_self() -> VALUE; + pub static mut rb_vm_insn_count: u64; pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; pub fn rb_callable_method_entry_or_negative( klass: VALUE, id: ID, ) -> *const rb_callable_method_entry_t; + pub static mut rb_cRubyVM: VALUE; pub static mut rb_mRubyVMFrozenCore: VALUE; pub static mut rb_block_param_proxy: VALUE; pub fn rb_vm_ep_local_ep(ep: *const VALUE) -> *const VALUE; pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; + pub fn rb_vm_env_write(ep: *const VALUE, index: ::std::os::raw::c_int, v: VALUE); pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; pub fn rb_vm_frame_method_entry( cfp: *const rb_control_frame_t, ) -> *const rb_callable_method_entry_t; + pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_ec_stack_check(ec: *mut rb_execution_context_struct) -> ::std::os::raw::c_int; + pub fn rb_shape_id_offset() -> i32; + pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t; + pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; + pub fn rb_shape_transition_add_ivar_no_warnings( + klass: VALUE, + original_shape_id: shape_id_t, + id: ID, + ) -> shape_id_t; + pub fn rb_ivar_get_at(obj: VALUE, index: attr_index_t, id: ID) -> VALUE; + pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE; + pub fn rb_gvar_get(arg1: ID) -> VALUE; + pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; + pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); + pub fn rb_vm_barrier(); + pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE; + pub fn rb_str_substr_two_fixnums( + str_: VALUE, + beg: VALUE, + len: VALUE, + empty: ::std::os::raw::c_int, + ) -> VALUE; pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE; pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE; - pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE; + pub fn rb_ec_str_resurrect( + ec: *mut rb_execution_context_struct, + str_: VALUE, + chilled: bool, + ) -> VALUE; + pub fn rb_to_hash_type(obj: VALUE) -> VALUE; pub fn rb_hash_stlike_foreach( hash: VALUE, func: st_foreach_callback_func, @@ -1175,15 +1136,17 @@ extern "C" { key: st_data_t, pval: *mut st_data_t, ) -> ::std::os::raw::c_int; - pub fn rb_gvar_get(arg1: ID) -> VALUE; - pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; - pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32); + pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int; + pub fn rb_float_plus(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_minus(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_mul(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_div(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_fix_aref(fix: VALUE, idx: VALUE) -> VALUE; pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint; pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; pub fn rb_iseq_label(iseq: *const rb_iseq_t) -> VALUE; - pub fn rb_vm_barrier(); pub fn rb_profile_frames( start: ::std::os::raw::c_int, limit: ::std::os::raw::c_int, @@ -1191,33 +1154,59 @@ extern "C" { lines: *mut ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); - pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; - pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); - pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; - pub fn rb_yjit_icache_invalidate( - start: *mut ::std::os::raw::c_void, - end: *mut ::std::os::raw::c_void, - ); pub fn rb_yjit_exit_locations_dict( yjit_raw_samples: *mut VALUE, yjit_line_samples: *mut ::std::os::raw::c_int, samples_len: ::std::os::raw::c_int, ) -> VALUE; - pub fn rb_yjit_get_page_size() -> u32; - pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8; - pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool; + pub fn rb_c_method_tracing_currently_enabled(ec: *const rb_execution_context_t) -> bool; pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE); - pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); - pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t); + pub fn rb_get_symbol_id(namep: VALUE) -> ID; + pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; + pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; + pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; + pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; + pub fn rb_yjit_splat_varg_checks( + sp: *mut VALUE, + splat_array: VALUE, + cfp: *mut rb_control_frame_t, + ) -> VALUE; + pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); + pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; + pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); + pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool; + pub fn rb_yjit_obj_written( + old: VALUE, + young: VALUE, + file: *const ::std::os::raw::c_char, + line: ::std::os::raw::c_int, + ); + pub fn rb_object_shape_count() -> VALUE; + pub fn rb_yjit_shape_obj_too_complex_p(obj: VALUE) -> bool; + pub fn rb_yjit_shape_capacity(shape_id: shape_id_t) -> attr_index_t; + pub fn rb_yjit_shape_index(shape_id: shape_id_t) -> attr_index_t; + pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_cme_ractor_serial(cme: *const rb_callable_method_entry_t) -> rb_serial_t; + pub fn rb_yjit_set_exception_return( + cfp: *mut rb_control_frame_t, + leave_exit: *mut ::std::os::raw::c_void, + leave_exception: *mut ::std::os::raw::c_void, + ); + pub fn rb_vm_instruction_size() -> u32; + pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; - pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; - pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; @@ -1230,7 +1219,6 @@ extern "C" { pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; - pub fn rb_get_symbol_id(namep: VALUE) -> ID; pub fn rb_get_cme_def_body_optimized_type( cme: *const rb_callable_method_entry_t, ) -> method_optimized_type; @@ -1242,85 +1230,93 @@ extern "C" { ) -> *mut rb_method_cfunc_t; pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize; pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; + pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; + pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; + pub fn rb_optimized_call( + recv: *mut VALUE, + ec: *mut rb_execution_context_t, + argc: ::std::os::raw::c_int, + argv: *mut VALUE, + kw_splat: ::std::os::raw::c_int, + block_handler: VALUE, + ) -> VALUE; + pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; - pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; pub fn rb_get_iseq_body_parent_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type; pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_anon_kwrest(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_ruby2_keywords(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_ambiguous_param0(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_forwardable(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_body_param_keyword( iseq: *const rb_iseq_t, - ) -> *const rb_seq_param_keyword_struct; + ) -> *const rb_iseq_param_keyword_struct; pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; - pub fn rb_optimized_call( - recv: *mut VALUE, - ec: *mut rb_execution_context_t, - argc: ::std::os::raw::c_int, - argv: *mut VALUE, - kw_splat: ::std::os::raw::c_int, - block_handler: VALUE, - ) -> VALUE; - pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; - pub fn rb_leaf_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; - pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; + pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t; pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; - pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); - pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); - pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t; pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE; pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE; pub fn rb_yarv_class_of(obj: VALUE) -> VALUE; - pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; - pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; - pub fn rb_yarv_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; - pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; - pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; - pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; - pub fn rb_yjit_multi_ractor_p() -> bool; pub fn rb_assert_iseq_handle(handle: VALUE); + pub fn rb_assert_holding_vm_lock(); pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int; pub fn rb_assert_cme_handle(handle: VALUE); - pub fn rb_yjit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); - pub fn rb_yjit_obj_written( - old: VALUE, - young: VALUE, - file: *const ::std::os::raw::c_char, - line: ::std::os::raw::c_int, - ); - pub fn rb_yjit_vm_lock_then_barrier( + pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; + pub fn rb_jit_array_len(a: VALUE) -> ::std::os::raw::c_long; + pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); + pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); + pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool; + pub fn rb_jit_multi_ractor_p() -> bool; + pub fn rb_jit_vm_lock_then_barrier( recursive_lock_level: *mut ::std::os::raw::c_uint, file: *const ::std::os::raw::c_char, line: ::std::os::raw::c_int, ); - pub fn rb_yjit_vm_unlock( + pub fn rb_jit_vm_unlock( recursive_lock_level: *mut ::std::os::raw::c_uint, file: *const ::std::os::raw::c_char, line: ::std::os::raw::c_int, ); + pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t); + pub fn rb_jit_get_page_size() -> u32; + pub fn rb_jit_reserve_addr_space(mem_size: u32) -> *mut u8; + pub fn rb_jit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); + pub fn rb_jit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_jit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); + pub fn rb_jit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_jit_icache_invalidate( + start: *mut ::std::os::raw::c_void, + end: *mut ::std::os::raw::c_void, + ); + pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_jit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 6fcec5b580..4f85937ee9 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -1,16 +1,44 @@ use crate::core::*; use crate::cruby::*; use crate::yjit::yjit_enabled_p; -#[cfg(feature = "disasm")] use crate::asm::CodeBlock; -#[cfg(feature = "disasm")] use crate::codegen::CodePtr; -#[cfg(feature = "disasm")] use crate::options::DumpDisasm; -#[cfg(feature = "disasm")] use std::fmt::Write; +#[cfg_attr(not(feature = "disasm"), allow(dead_code))] +#[derive(Copy, Clone, Debug)] +pub struct TerminalColor { + pub blue_begin: &'static str, + pub blue_end: &'static str, + pub bold_begin: &'static str, + pub bold_end: &'static str, +} + +pub static TTY_TERMINAL_COLOR: TerminalColor = TerminalColor { + blue_begin: "\x1b[34m", + blue_end: "\x1b[0m", + bold_begin: "\x1b[1m", + bold_end: "\x1b[22m", +}; + +pub static NON_TTY_TERMINAL_COLOR: TerminalColor = TerminalColor { + blue_begin: "", + blue_end: "", + bold_begin: "", + bold_end: "", +}; + +/// Terminal escape codes for colors, font weight, etc. Only enabled if stdout is a TTY. +pub fn get_colors() -> &'static TerminalColor { + if crate::utils::stdout_supports_colors() { + &TTY_TERMINAL_COLOR + } else { + &NON_TTY_TERMINAL_COLOR + } +} + /// Primitive called in yjit.rb /// Produce a string representing the disassembly for an ISEQ #[no_mangle] @@ -23,11 +51,6 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU #[cfg(feature = "disasm")] { - // TODO: - //if unsafe { CLASS_OF(iseqw) != rb_cISeq } { - // return Qnil; - //} - if !yjit_enabled_p() { return Qnil; } @@ -37,56 +60,46 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU // This will truncate disassembly of methods with 10k+ bytecodes. // That's a good thing - this prints to console. - let out_string = disasm_iseq_insn_range(iseq, 0, 9999); + let out_string = with_vm_lock(src_loc!(), || disasm_iseq_insn_range(iseq, 0, 9999)); return rust_str_to_ruby(&out_string); } } +/// Only call while holding the VM lock. #[cfg(feature = "disasm")] -pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> String { let mut out = String::from(""); // Get a list of block versions generated for this iseq - let mut block_list = get_or_create_iseq_block_list(iseq); + let block_list = get_or_create_iseq_block_list(iseq); + let mut block_list: Vec<&Block> = block_list.into_iter().map(|blockref| { + // SAFETY: We have the VM lock here and all the blocks on iseqs are valid. + unsafe { blockref.as_ref() } + }).collect(); // Get a list of codeblocks relevant to this iseq let global_cb = crate::codegen::CodegenGlobals::get_inline_cb(); // Sort the blocks by increasing start addresses - block_list.sort_by(|a, b| { - use std::cmp::Ordering; - - // Get the start addresses for each block - let addr_a = a.borrow().get_start_addr().unwrap().raw_ptr(); - let addr_b = b.borrow().get_start_addr().unwrap().raw_ptr(); - - if addr_a < addr_b { - Ordering::Less - } else if addr_a == addr_b { - Ordering::Equal - } else { - Ordering::Greater - } - }); + block_list.sort_by_key(|block| block.get_start_addr().as_offset()); // Compute total code size in bytes for all blocks in the function let mut total_code_size = 0; for blockref in &block_list { - total_code_size += blockref.borrow().code_size(); + total_code_size += blockref.code_size(); } writeln!(out, "NUM BLOCK VERSIONS: {}", block_list.len()).unwrap(); writeln!(out, "TOTAL INLINE CODE SIZE: {} bytes", total_code_size).unwrap(); // For each block, sorted by increasing start address - for block_idx in 0..block_list.len() { - let block = block_list[block_idx].borrow(); + for (block_idx, block) in block_list.iter().enumerate() { let blockid = block.get_blockid(); if blockid.idx >= start_idx && blockid.idx < end_idx { let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap(); - let end_addr = block.get_end_addr().unwrap(); + let start_addr = block.get_start_addr(); + let end_addr = block.get_end_addr(); let code_size = block.code_size(); // Write some info about the current block @@ -110,9 +123,9 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St // If this is not the last block if block_idx < block_list.len() - 1 { // Compute the size of the gap between this block and the next - let next_block = block_list[block_idx + 1].borrow(); - let next_start_addr = next_block.get_start_addr().unwrap(); - let gap_size = next_start_addr.into_usize() - end_addr.into_usize(); + let next_block = block_list[block_idx + 1]; + let next_start_addr = next_block.get_start_addr(); + let gap_size = next_start_addr.as_offset() - end_addr.as_offset(); // Log the size of the gap between the blocks if nonzero if gap_size > 0 { @@ -125,19 +138,21 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St return out; } -#[cfg(feature = "disasm")] +/// Dump dissassembly for a range in a [CodeBlock]. VM lock required. pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: CodePtr, dump_disasm: &DumpDisasm) { - use std::fs::File; - use std::io::Write; - for (start_addr, end_addr) in cb.writable_addrs(start_addr, end_addr) { let disasm = disasm_addr_range(cb, start_addr, end_addr); if disasm.len() > 0 { match dump_disasm { DumpDisasm::Stdout => println!("{disasm}"), - DumpDisasm::File(path) => { - let mut f = File::options().create(true).append(true).open(path).unwrap(); - f.write_all(disasm.as_bytes()).unwrap(); + DumpDisasm::File(fd) => { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + use std::io::Write; + + // Write with the fd opened during boot + let mut file = unsafe { std::fs::File::from_raw_fd(*fd) }; + file.write_all(disasm.as_bytes()).unwrap(); + let _ = file.into_raw_fd(); // keep the fd open } }; } @@ -171,75 +186,181 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> // Disassemble the instructions let code_size = end_addr - start_addr; let code_slice = unsafe { std::slice::from_raw_parts(start_addr as _, code_size) }; + // Stabilize output for cargo test + #[cfg(test)] + let start_addr = 0; let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + let colors = get_colors(); - // Colorize outlined code in blue - if cb.outlined { - write!(&mut out, "\x1b[34m").unwrap(); - } // For each instruction in this block for insn in insns.as_ref() { // Comments for this block if let Some(comment_list) = cb.comments_at(insn.address() as usize) { for comment in comment_list { - writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold + if cb.outlined { + write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue + } + writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold } } + if cb.outlined { + write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue + } writeln!(&mut out, " {insn}").unwrap(); - } - // Disable blue color - if cb.outlined { - write!(&mut out, "\x1b[0m").unwrap(); + if cb.outlined { + write!(&mut out, "{}", colors.blue_end).unwrap(); // Disable blue + } } return out; } +/// Fallback version without dependency on a disassembler which prints just bytes and comments. +#[cfg(not(feature = "disasm"))] +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String { + let mut out = String::new(); + let mut line_byte_idx = 0; + const MAX_BYTES_PER_LINE: usize = 16; + let colors = get_colors(); + + for addr in start_addr..end_addr { + if let Some(comment_list) = cb.comments_at(addr) { + // Start a new line if we're in the middle of one + if line_byte_idx != 0 { + writeln!(&mut out).unwrap(); + line_byte_idx = 0; + } + for comment in comment_list { + writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold + } + } + if line_byte_idx == 0 { + write!(&mut out, " 0x{addr:x}: ").unwrap(); + } else { + write!(&mut out, " ").unwrap(); + } + let byte = unsafe { (addr as *const u8).read() }; + write!(&mut out, "{byte:02x}").unwrap(); + line_byte_idx += 1; + if line_byte_idx == MAX_BYTES_PER_LINE - 1 { + writeln!(&mut out).unwrap(); + line_byte_idx = 0; + } + } + + if !out.is_empty() { + writeln!(&mut out).unwrap(); + } + + out +} + +/// Assert that CodeBlock has the code specified with hex. In addition, if tested with +/// `cargo test --all-features`, it also checks it generates the specified disasm. +#[cfg(test)] +macro_rules! assert_disasm { + ($cb:expr, $hex:expr, $disasm:expr) => { + #[cfg(feature = "disasm")] + { + let disasm = disasm_addr_range( + &$cb, + $cb.get_ptr(0).raw_addr(&$cb), + $cb.get_write_ptr().raw_addr(&$cb), + ); + assert_eq!(unindent(&disasm, false), unindent(&$disasm, true)); + } + assert_eq!(format!("{:x}", $cb), $hex); + }; +} +#[cfg(test)] +pub(crate) use assert_disasm; + +/// Remove the minimum indent from every line, skipping the first line if `skip_first`. +#[cfg(all(feature = "disasm", test))] +pub fn unindent(string: &str, trim_lines: bool) -> String { + fn split_lines(string: &str) -> Vec<String> { + let mut result: Vec<String> = vec![]; + let mut buf: Vec<u8> = vec![]; + for byte in string.as_bytes().iter() { + buf.push(*byte); + if *byte == b'\n' { + result.push(String::from_utf8(buf).unwrap()); + buf = vec![]; + } + } + if !buf.is_empty() { + result.push(String::from_utf8(buf).unwrap()); + } + result + } + + // Break up a string into multiple lines + let mut lines = split_lines(string); + if trim_lines { // raw string literals come with extra lines + lines.remove(0); + lines.remove(lines.len() - 1); + } + + // Count the minimum number of spaces + let spaces = lines.iter().filter_map(|line| { + for (i, ch) in line.as_bytes().iter().enumerate() { + if *ch != b' ' { + return Some(i); + } + } + None + }).min().unwrap_or(0); + + // Join lines, removing spaces + let mut unindented: Vec<u8> = vec![]; + for line in lines.iter() { + if line.len() > spaces { + unindented.extend_from_slice(&line.as_bytes()[spaces..]); + } else { + unindented.extend_from_slice(&line.as_bytes()); + } + } + String::from_utf8(unindented).unwrap() +} + /// Primitive called in yjit.rb /// Produce a list of instructions compiled for an isew #[no_mangle] pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE { - { - // TODO: - //if unsafe { CLASS_OF(iseqw) != rb_cISeq } { - // return Qnil; - //} - - if !yjit_enabled_p() { - return Qnil; - } - - // Get the iseq pointer from the wrapper - let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; + if !yjit_enabled_p() { + return Qnil; + } - // Get the list of instructions compiled - let insn_vec = insns_compiled(iseq); + // Get the iseq pointer from the wrapper + let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - unsafe { - let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64); + // Get the list of instructions compiled + let insn_vec = insns_compiled(iseq); - // For each instruction compiled - for idx in 0..insn_vec.len() { - let op_name = &insn_vec[idx].0; - let insn_idx = insn_vec[idx].1; + unsafe { + let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64); - let op_sym = rust_str_to_sym(&op_name); + // For each instruction compiled + for idx in 0..insn_vec.len() { + let op_name = &insn_vec[idx].0; + let insn_idx = insn_vec[idx].1; - // Store the instruction index and opcode symbol - rb_ary_store( - insn_ary, - (2 * idx + 0) as i64, - VALUE::fixnum_from_usize(insn_idx as usize), - ); - rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym); - } + let op_sym = rust_str_to_sym(&op_name); - insn_ary + // Store the instruction index and opcode symbol + rb_ary_store( + insn_ary, + (2 * idx + 0) as i64, + VALUE::fixnum_from_usize(insn_idx as usize), + ); + rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym); } + + insn_ary } } -fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> { +fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u16)> { let mut insn_vec = Vec::new(); // Get a list of block versions generated for this iseq @@ -247,16 +368,18 @@ fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> { // For each block associated with this iseq for blockref in &block_list { - let block = blockref.borrow(); + // SAFETY: Called as part of a Ruby method, which ensures the graph is + // well connected for the given iseq. + let block = unsafe { blockref.as_ref() }; let start_idx = block.get_blockid().idx; let end_idx = block.get_end_idx(); - assert!(end_idx <= unsafe { get_iseq_encoded_size(iseq) }); + assert!(u32::from(end_idx) <= unsafe { get_iseq_encoded_size(iseq) }); // For each YARV instruction in the block let mut insn_idx = start_idx; while insn_idx < end_idx { // Get the current pc and opcode - let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() @@ -269,7 +392,7 @@ fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> { insn_vec.push((op_name, insn_idx)); // Move to the next instruction - insn_idx += insn_len(opcode); + insn_idx += insn_len(opcode) as u16; } } diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index 734b32c464..0f22fba6b8 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -1,23 +1,23 @@ //! Code to track assumptions made during code generation and invalidate //! generated code if and when these assumptions are invalidated. -use crate::asm::OutlinedCb; +use crate::backend::ir::Assembler; use crate::codegen::*; use crate::core::*; use crate::cruby::*; -use crate::options::*; use crate::stats::*; use crate::utils::IntoUsize; use crate::yjit::yjit_enabled_p; use std::collections::{HashMap, HashSet}; +use std::os::raw::c_void; use std::mem; // Invariants to track: // assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) // assume_method_lookup_stable(comptime_recv_klass, cme, jit); -// assume_single_ractor_mode(jit) -// assume_stable_global_constant_state(jit); +// assume_single_ractor_mode() +// track_stable_constant_names_assumption() /// Used to track all of the various block references that contain assumptions /// about the state of the virtual machine. @@ -30,7 +30,6 @@ pub struct Invariants { /// quick access to all of the blocks that are making this assumption when /// the operator is redefined. basic_operator_blocks: HashMap<(RedefinitionFlag, ruby_basic_operators), HashSet<BlockRef>>, - /// A map from a block to a set of classes and their associated basic /// operators that the block is assuming are not redefined. This is used for /// quick access to all of the assumptions that a block is making when it @@ -48,10 +47,23 @@ pub struct Invariants { /// a constant `A::B` is redefined, then all blocks that are assuming that /// `A` and `B` have not be redefined must be invalidated. constant_state_blocks: HashMap<ID, HashSet<BlockRef>>, - /// A map from a block to a set of IDs that it is assuming have not been /// redefined. block_constant_states: HashMap<BlockRef, HashSet<ID>>, + + /// A map from a class to a set of blocks that assume objects of the class + /// will have no singleton class. When the set is empty, it means that + /// there has been a singleton class for the class after boot, so you cannot + /// assume no singleton class going forward. + /// For now, the key can be only Array, Hash, or String. Consider making + /// an inverted HashMap if we start using this for user-defined classes + /// to maintain the performance of block_assumptions_free(). + no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>, + + /// A map from an ISEQ to a set of blocks that assume base pointer is equal + /// to environment pointer. When the set is empty, it means that EP has been + /// escaped in the ISEQ. + no_ep_escape_iseqs: HashMap<IseqPtr, HashSet<BlockRef>>, } /// Private singleton instance of the invariants global struct. @@ -68,6 +80,8 @@ impl Invariants { single_ractor: HashSet::new(), constant_state_blocks: HashMap::new(), block_constant_states: HashMap::new(), + no_singleton_classes: HashMap::new(), + no_ep_escape_iseqs: HashMap::new(), }); } } @@ -78,29 +92,20 @@ impl Invariants { } } -/// A public function that can be called from within the code generation -/// functions to ensure that the block being generated is invalidated when the -/// basic operator is redefined. +/// Mark the pending block as assuming that certain basic operators (e.g. Integer#==) +/// have not been redefined. +#[must_use] pub fn assume_bop_not_redefined( jit: &mut JITState, - ocb: &mut OutlinedCb, + asm: &mut Assembler, klass: RedefinitionFlag, bop: ruby_basic_operators, ) -> bool { if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } { - jit_ensure_block_entry_exit(jit, ocb); - - let invariants = Invariants::get_instance(); - invariants - .basic_operator_blocks - .entry((klass, bop)) - .or_default() - .insert(jit.get_block()); - invariants - .block_basic_operators - .entry(jit.get_block()) - .or_default() - .insert((klass, bop)); + if jit_ensure_block_entry_exit(jit, asm).is_none() { + return false; + } + jit.bop_assumptions.push((klass, bop)); return true; } else { @@ -108,30 +113,75 @@ pub fn assume_bop_not_redefined( } } -// Remember that a block assumes that -// `rb_callable_method_entry(receiver_klass, cme->called_id) == cme` and that -// `cme` is valid. -// When either of these assumptions becomes invalid, rb_yjit_method_lookup_change() or -// rb_yjit_cme_invalidate() invalidates the block. -// -// @raise NoMemoryError -pub fn assume_method_lookup_stable( - jit: &mut JITState, - ocb: &mut OutlinedCb, +/// Track that a block is only valid when a certain basic operator has not been redefined +/// since the block's inception. +pub fn track_bop_assumption(uninit_block: BlockRef, bop: (RedefinitionFlag, ruby_basic_operators)) { + let invariants = Invariants::get_instance(); + invariants + .basic_operator_blocks + .entry(bop) + .or_default() + .insert(uninit_block); + invariants + .block_basic_operators + .entry(uninit_block) + .or_default() + .insert(bop); +} + +/// Track that a block will assume that `cme` is valid (false == METHOD_ENTRY_INVALIDATED(cme)). +/// [rb_yjit_cme_invalidate] invalidates the block when `cme` is invalidated. +pub fn track_method_lookup_stability_assumption( + uninit_block: BlockRef, callee_cme: *const rb_callable_method_entry_t, ) { - jit_ensure_block_entry_exit(jit, ocb); - - let block = jit.get_block(); - block - .borrow_mut() - .add_cme_dependency(callee_cme); - Invariants::get_instance() .cme_validity .entry(callee_cme) .or_default() - .insert(block); + .insert(uninit_block); +} + +/// Track that a block will assume that `klass` objects will have no singleton class. +pub fn track_no_singleton_class_assumption(uninit_block: BlockRef, klass: VALUE) { + Invariants::get_instance() + .no_singleton_classes + .entry(klass) + .or_default() + .insert(uninit_block); +} + +/// Returns true if we've seen a singleton class of a given class since boot. +pub fn has_singleton_class_of(klass: VALUE) -> bool { + Invariants::get_instance() + .no_singleton_classes + .get(&klass) + .map_or(false, |blocks| blocks.is_empty()) +} + +/// Track that a block will assume that base pointer is equal to environment pointer. +pub fn track_no_ep_escape_assumption(uninit_block: BlockRef, iseq: IseqPtr) { + Invariants::get_instance() + .no_ep_escape_iseqs + .entry(iseq) + .or_default() + .insert(uninit_block); +} + +/// Returns true if a given ISEQ has previously escaped an environment. +pub fn iseq_escapes_ep(iseq: IseqPtr) -> bool { + Invariants::get_instance() + .no_ep_escape_iseqs + .get(&iseq) + .map_or(false, |blocks| blocks.is_empty()) +} + +/// Forget an ISEQ remembered in invariants +pub fn iseq_free_invariants(iseq: IseqPtr) { + if unsafe { INVARIANTS.is_none() } { + return; + } + Invariants::get_instance().no_ep_escape_iseqs.remove(&iseq); } // Checks rb_method_basic_definition_p and registers the current block for invalidation if method @@ -140,13 +190,13 @@ pub fn assume_method_lookup_stable( // default behavior. pub fn assume_method_basic_definition( jit: &mut JITState, - ocb: &mut OutlinedCb, + asm: &mut Assembler, klass: VALUE, mid: ID - ) -> bool { +) -> bool { if unsafe { rb_method_basic_definition_p(klass, mid) } != 0 { let cme = unsafe { rb_callable_method_entry(klass, mid) }; - assume_method_lookup_stable(jit, ocb, cme); + jit.assume_method_lookup_stable(asm, cme); true } else { false @@ -155,30 +205,34 @@ pub fn assume_method_basic_definition( /// Tracks that a block is assuming it is operating in single-ractor mode. #[must_use] -pub fn assume_single_ractor_mode(jit: &mut JITState, ocb: &mut OutlinedCb) -> bool { - if unsafe { rb_yjit_multi_ractor_p() } { +pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler) -> bool { + if unsafe { rb_jit_multi_ractor_p() } { false } else { - jit_ensure_block_entry_exit(jit, ocb); - Invariants::get_instance() - .single_ractor - .insert(jit.get_block()); + if jit_ensure_block_entry_exit(jit, asm).is_none() { + return false; + } + jit.block_assumes_single_ractor = true; + true } } -/// Walk through the ISEQ to go from the current opt_getinlinecache to the -/// subsequent opt_setinlinecache and find all of the name components that are -/// associated with this constant (which correspond to the getconstant -/// arguments). -pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb, idlist: *const ID) { - /// Tracks that a block is assuming that the name component of a constant - /// has not changed since the last call to this function. +/// Track that the block will assume single ractor mode. +pub fn track_single_ractor_assumption(uninit_block: BlockRef) { + Invariants::get_instance() + .single_ractor + .insert(uninit_block); +} + +/// Track that a block will assume that the name components of a constant path expression +/// has not changed since the block's full initialization. +pub fn track_stable_constant_names_assumption(uninit_block: BlockRef, idlist: *const ID) { fn assume_stable_constant_name( - jit: &mut JITState, + uninit_block: BlockRef, id: ID, ) { - if id == idNULL as u64 { + if id == ID!(NULL) { // Used for :: prefix return; } @@ -188,10 +242,10 @@ pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb, id .constant_state_blocks .entry(id) .or_default() - .insert(jit.get_block()); + .insert(uninit_block); invariants .block_constant_states - .entry(jit.get_block()) + .entry(uninit_block) .or_default() .insert(id); } @@ -200,12 +254,9 @@ pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb, id for i in 0.. { match unsafe { *idlist.offset(i) } { 0 => break, // End of NULL terminated list - id => assume_stable_constant_name(jit, id), + id => assume_stable_constant_name(uninit_block, id), } } - - jit_ensure_block_entry_exit(jit, ocb); - } /// Called when a basic operator is redefined. Note that all the blocks assuming @@ -252,7 +303,7 @@ pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_e }); } -/// Callback for then Ruby is about to spawn a ractor. In that case we need to +/// Callback for when Ruby is about to spawn a ractor. In that case we need to /// invalidate every block that is assuming single ractor mode. #[no_mangle] pub extern "C" fn rb_yjit_before_ractor_spawn() { @@ -282,32 +333,11 @@ pub extern "C" fn rb_yjit_constant_state_changed(id: ID) { } with_vm_lock(src_loc!(), || { - if get_option!(global_constant_state) { - // If the global-constant-state option is set, then we're going to - // invalidate every block that depends on any constant. - - Invariants::get_instance() - .constant_state_blocks - .keys() - .for_each(|id| { - if let Some(blocks) = - Invariants::get_instance().constant_state_blocks.remove(&id) - { - for block in &blocks { - invalidate_block_version(block); - incr_counter!(invalidate_constant_state_bump); - } - } - }); - } else { - // If the global-constant-state option is not set, then we're only going - // to invalidate the blocks that are associated with the given ID. - - if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) { - for block in &blocks { - invalidate_block_version(block); - incr_counter!(invalidate_constant_state_bump); - } + // Invalidate the blocks that are associated with the given ID. + if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) { + for block in &blocks { + invalidate_block_version(block); + incr_counter!(invalidate_constant_state_bump); } } }); @@ -327,7 +357,7 @@ pub extern "C" fn rb_yjit_root_mark() { // Why not let the GC move the cme keys in this table? // Because this is basically a compare_by_identity Hash. // If a key moves, we would need to reinsert it into the table so it is rehashed. - // That is tricky to do, espcially as it could trigger allocation which could + // That is tricky to do, especially as it could trigger allocation which could // trigger GC. Not sure if it is okay to trigger GC while the GC is updating // references. // @@ -344,21 +374,41 @@ pub extern "C" fn rb_yjit_root_mark() { } } +#[no_mangle] +pub extern "C" fn rb_yjit_root_update_references() { + if unsafe { INVARIANTS.is_none() } { + return; + } + let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs; + + // Make a copy of the table with updated ISEQ keys + let mut updated_copy = HashMap::with_capacity(no_ep_escape_iseqs.len()); + for (iseq, blocks) in mem::take(no_ep_escape_iseqs) { + let new_iseq = unsafe { rb_gc_location(iseq.into()) }.as_iseq(); + updated_copy.insert(new_iseq, blocks); + } + + *no_ep_escape_iseqs = updated_copy; +} + /// Remove all invariant assumptions made by the block by removing the block as /// as a key in all of the relevant tables. -pub fn block_assumptions_free(blockref: &BlockRef) { +/// For safety, the block has to be initialized and the vm lock must be held. +/// However, outgoing/incoming references to the block does _not_ need to be valid. +pub fn block_assumptions_free(blockref: BlockRef) { let invariants = Invariants::get_instance(); { - let block = blockref.borrow(); + // SAFETY: caller ensures that this reference is valid + let block = unsafe { blockref.as_ref() }; // For each method lookup dependency for dep in block.iter_cme_deps() { // Remove tracking for cme validity - if let Some(blockset) = invariants.cme_validity.get_mut(dep) { - blockset.remove(blockref); + if let Some(blockset) = invariants.cme_validity.get_mut(&dep) { + blockset.remove(&blockref); if blockset.is_empty() { - invariants.cme_validity.remove(dep); + invariants.cme_validity.remove(&dep); } } } @@ -411,19 +461,41 @@ pub fn block_assumptions_free(blockref: &BlockRef) { if invariants.constant_state_blocks.is_empty() { invariants.constant_state_blocks.shrink_to_fit(); } + + // Remove tracking for blocks assuming no singleton class + // NOTE: no_singleton_class has up to 3 keys (Array, Hash, or String) for now. + // This is effectively an O(1) access unless we start using it for more classes. + for (_, blocks) in invariants.no_singleton_classes.iter_mut() { + blocks.remove(&blockref); + } + + // Remove tracking for blocks assuming EP doesn't escape + let iseq = unsafe { blockref.as_ref() }.get_blockid().iseq; + if let Some(blocks) = invariants.no_ep_escape_iseqs.get_mut(&iseq) { + blocks.remove(&blockref); + } } /// Callback from the opt_setinlinecache instruction in the interpreter. /// Invalidate the block for the matching opt_getinlinecache so it could regenerate code /// using the new value in the constant cache. #[no_mangle] -pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, insn_idx: u32) { +pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, insn_idx: std::os::raw::c_uint) { // If YJIT isn't enabled, do nothing if !yjit_enabled_p() { return; } - if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } { + // Try to downcast the iseq index + let insn_idx: IseqIdx = if let Ok(idx) = insn_idx.try_into() { + idx + } else { + // The index is too large, YJIT can't possibly have code for it, + // so there is nothing to invalidate. + return; + }; + + if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_jit_multi_ractor_p() } { // We can't generate code in these situations, so no need to invalidate. // See gen_opt_getinlinecache. return; @@ -435,7 +507,7 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, ins // This should come from a running iseq, so direct threading translation // should have been done assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED)) } != VALUE(0)); - assert!(insn_idx < unsafe { get_iseq_encoded_size(iseq) }); + assert!(u32::from(insn_idx) < unsafe { get_iseq_encoded_size(iseq) }); // Ensure that the instruction the insn_idx is pointing to is in // fact a opt_getconstant_path instruction. @@ -468,6 +540,66 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, ins }); } +/// Invalidate blocks that assume objects of a given class will have no singleton class. +#[no_mangle] +pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) { + // Skip tracking singleton classes during boot. Such objects already have a singleton class + // before entering JIT code, so they get rejected when they're checked for the first time. + if unsafe { INVARIANTS.is_none() } { + return; + } + + // We apply this optimization only to Array, Hash, and String for now. + if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&klass) } { + with_vm_lock(src_loc!(), || { + let no_singleton_classes = &mut Invariants::get_instance().no_singleton_classes; + match no_singleton_classes.get_mut(&klass) { + Some(blocks) => { + // Invalidate existing blocks and let has_singleton_class_of() + // return true when they are compiled again + for block in mem::take(blocks) { + invalidate_block_version(&block); + incr_counter!(invalidate_no_singleton_class); + } + } + None => { + // Let has_singleton_class_of() return true for this class + no_singleton_classes.insert(klass, HashSet::new()); + } + } + }); + } +} + +/// Invalidate blocks for a given ISEQ that assumes environment pointer is +/// equal to base pointer. +#[no_mangle] +pub extern "C" fn rb_yjit_invalidate_ep_is_bp(iseq: IseqPtr) { + // Skip tracking EP escapes on boot. We don't need to invalidate anything during boot. + if unsafe { INVARIANTS.is_none() } { + return; + } + + with_vm_lock(src_loc!(), || { + // If an EP escape for this ISEQ is detected for the first time, invalidate all blocks + // associated to the ISEQ. + let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs; + match no_ep_escape_iseqs.get_mut(&iseq) { + Some(blocks) => { + // Invalidate existing blocks and make jit.ep_is_bp() return false + for block in mem::take(blocks) { + invalidate_block_version(&block); + incr_counter!(invalidate_ep_escape); + } + } + None => { + // Let jit.ep_is_bp() return false for this ISEQ + no_ep_escape_iseqs.insert(iseq, HashSet::new()); + } + } + }); +} + // Invalidate all generated code and patch C method return code to contain // logic for firing the c_return TracePoint event. Once rb_vm_barrier() // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which @@ -494,6 +626,8 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { return; } + incr_counter!(invalidate_everything); + // Stop other ractors since we are going to patch machine code. with_vm_lock(src_loc!(), || { // Make it so all live block versions are no longer valid branch targets @@ -508,17 +642,18 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { if on_stack_iseqs.contains(&iseq) { // This ISEQ is running, so we can't free blocks immediately for block in blocks { - delayed_deallocation(&block); + delayed_deallocation(block); } payload.dead_blocks.shrink_to_fit(); } else { // Safe to free dead blocks since the ISEQ isn't running + // Since we're freeing _all_ blocks, we don't need to keep the graph well formed for block in blocks { - free_block(&block); + unsafe { free_block(block, false) }; } mem::take(&mut payload.dead_blocks) - .iter() - .for_each(free_block); + .into_iter() + .for_each(|block| unsafe { free_block(block, false) }); } } @@ -528,37 +663,44 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { let cb = CodegenGlobals::get_inline_cb(); + // Prevent on-stack frames from jumping to the caller on jit_exec_exception + extern "C" { + fn rb_yjit_cancel_jit_return(leave_exit: *mut c_void, leave_exception: *mut c_void) -> VALUE; + } + unsafe { + rb_yjit_cancel_jit_return( + CodegenGlobals::get_leave_exit_code().raw_ptr(cb) as _, + CodegenGlobals::get_leave_exception_code().raw_ptr(cb) as _, + ); + } + // Apply patches let old_pos = cb.get_write_pos(); let old_dropped_bytes = cb.has_dropped_bytes(); let mut patches = CodegenGlobals::take_global_inval_patches(); - patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr()); + patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr(cb)); let mut last_patch_end = std::ptr::null(); for patch in &patches { - assert!(last_patch_end <= patch.inline_patch_pos.raw_ptr(), "patches should not overlap"); - - let mut asm = crate::backend::ir::Assembler::new(); - asm.jmp(patch.outlined_target_pos.as_side_exit()); + let patch_pos = patch.inline_patch_pos.raw_ptr(cb); + assert!( + last_patch_end <= patch_pos, + "patches should not overlap (last_patch_end: {last_patch_end:?}, patch_pos: {patch_pos:?})", + ); cb.set_write_ptr(patch.inline_patch_pos); cb.set_dropped_bytes(false); - asm.compile(cb); - last_patch_end = cb.get_write_ptr().raw_ptr(); + cb.without_page_end_reserve(|cb| { + let mut asm = crate::backend::ir::Assembler::new_without_iseq(); + asm.jmp(patch.outlined_target_pos.as_side_exit()); + if asm.compile(cb, None).is_none() { + panic!("Failed to apply patch at {:?}", patch.inline_patch_pos); + } + }); + last_patch_end = cb.get_write_ptr().raw_ptr(cb); } cb.set_pos(old_pos); cb.set_dropped_bytes(old_dropped_bytes); - // Freeze invalidated part of the codepage. We only want to wait for - // running instances of the code to exit from now on, so we shouldn't - // change the code. There could be other ractors sleeping in - // branch_stub_hit(), for example. We could harden this by changing memory - // protection on the frozen range. - assert!( - CodegenGlobals::get_inline_frozen_bytes() <= old_pos, - "frozen bytes should increase monotonically" - ); - CodegenGlobals::set_inline_frozen_bytes(old_pos); - CodegenGlobals::get_outlined_cb() .unwrap() .mark_all_executable(); diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index ce87cc250a..f3247fbf1a 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -3,8 +3,20 @@ #![allow(clippy::too_many_arguments)] // :shrug: #![allow(clippy::identity_op)] // Sometimes we do it for style +// TODO(alan): This lint is right -- the way we use `static mut` is UB happy. We have many globals +// and take `&mut` frequently, sometimes with a method that easily allows calling it twice. +// +// All of our globals rely on us running single threaded, which outside of boot-time relies on the +// VM lock (which signals and waits for all other threads to pause). To fix this properly, we should +// gather up all the globals into a struct to centralize the safety reasoning. That way we can also +// check for re-entrance in one place. +// +// We're too close to release to do that, though, so disable the lint for now. +#![allow(unknown_lints)] +#![allow(static_mut_refs)] +#![warn(unknown_lints)] -mod asm; +pub mod asm; mod backend; mod codegen; mod core; @@ -16,3 +28,4 @@ mod stats; mod utils; mod yjit; mod virtualmem; +mod log; diff --git a/yjit/src/log.rs b/yjit/src/log.rs new file mode 100644 index 0000000000..c5a724f7e1 --- /dev/null +++ b/yjit/src/log.rs @@ -0,0 +1,179 @@ +use crate::core::BlockId; +use crate::cruby::*; +use crate::options::*; +use crate::yjit::yjit_enabled_p; + +use std::fmt::{Display, Formatter}; +use std::os::raw::c_long; +use crate::utils::iseq_get_location; + +type Timestamp = f64; + +#[derive(Clone, Debug)] +pub struct LogEntry { + /// The time when the block was compiled. + pub timestamp: Timestamp, + + /// The log message. + pub message: String, +} + +impl Display for LogEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:15.6}: {}", self.timestamp, self.message) + } +} + +pub type Log = CircularBuffer<LogEntry, 1024>; +static mut LOG: Option<Log> = None; + +impl Log { + pub fn init() { + unsafe { + LOG = Some(Log::new()); + } + } + + pub fn get_instance() -> &'static mut Log { + unsafe { + LOG.as_mut().unwrap() + } + } + + pub fn has_instance() -> bool { + unsafe { + LOG.as_mut().is_some() + } + } + + pub fn add_block_with_chain_depth(block_id: BlockId, chain_depth: u8) { + if !Self::has_instance() { + return; + } + + let print_log = get_option!(log); + let timestamp = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64(); + + let location = iseq_get_location(block_id.iseq, block_id.idx); + let index = block_id.idx; + let message = if chain_depth > 0 { + format!("{} (index: {}, chain_depth: {})", location, index, chain_depth) + } else { + format!("{} (index: {})", location, index) + }; + + let entry = LogEntry { + timestamp, + message + }; + + if let Some(output) = print_log { + match output { + LogOutput::Stderr => { + eprintln!("{}", entry); + } + + LogOutput::File(fd) => { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + use std::io::Write; + + // Write with the fd opened during boot + let mut file = unsafe { std::fs::File::from_raw_fd(fd) }; + writeln!(file, "{}", entry).unwrap(); + file.flush().unwrap(); + let _ = file.into_raw_fd(); // keep the fd open + } + + LogOutput::MemoryOnly => () // Don't print or write anything + } + } + + Self::get_instance().push(entry); + } +} + +pub struct CircularBuffer<T, const N: usize> { + buffer: Vec<Option<T>>, + head: usize, + tail: usize, + size: usize +} + +impl<T: Clone, const N: usize> CircularBuffer<T, N> { + pub fn new() -> Self { + Self { + buffer: vec![None; N], + head: 0, + tail: 0, + size: 0 + } + } + + pub fn push(&mut self, value: T) { + self.buffer[self.head] = Some(value); + self.head = (self.head + 1) % N; + if self.size == N { + self.tail = (self.tail + 1) % N; + } else { + self.size += 1; + } + } + + pub fn pop(&mut self) -> Option<T> { + if self.size == 0 { + return None; + } + + let value = self.buffer[self.tail].take(); + self.tail = (self.tail + 1) % N; + self.size -= 1; + value + } + + pub fn len(&self) -> usize { + self.size + } +} + + +//=========================================================================== + +/// Primitive called in yjit.rb +/// Check if log generation is enabled +#[no_mangle] +pub extern "C" fn rb_yjit_log_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if get_option!(log).is_some() { + return Qtrue; + } else { + return Qfalse; + } +} + +/// Primitive called in yjit.rb. +/// Export all YJIT log entries as a Ruby array. +#[no_mangle] +pub extern "C" fn rb_yjit_get_log(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || rb_yjit_get_log_array()) +} + +fn rb_yjit_get_log_array() -> VALUE { + if !yjit_enabled_p() || get_option!(log).is_none() { + return Qnil; + } + + let log = Log::get_instance(); + let array = unsafe { rb_ary_new_capa(log.len() as c_long) }; + + while log.len() > 0 { + let entry = log.pop().unwrap(); + + unsafe { + let entry_array = rb_ary_new_capa(2); + rb_ary_push(entry_array, rb_float_new(entry.timestamp)); + rb_ary_push(entry_array, entry.message.into()); + rb_ary_push(array, entry_array); + } + } + + return array; +} diff --git a/yjit/src/options.rs b/yjit/src/options.rs index e720c33b0b..c87a436091 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -1,19 +1,40 @@ -use std::ffi::CStr; +use std::{ffi::{CStr, CString}, ptr::null, fs::File}; +use crate::{backend::current::TEMP_REGS, cruby::*, stats::Counter}; +use std::os::raw::{c_char, c_int, c_uint}; + +// Call threshold for small deployments and command-line apps +pub static SMALL_CALL_THRESHOLD: u64 = 30; + +// Call threshold for larger deployments and production-sized applications +pub static LARGE_CALL_THRESHOLD: u64 = 120; + +// Number of live ISEQs after which we consider an app to be large +pub static LARGE_ISEQ_COUNT: u64 = 40_000; + +// This option is exposed to the C side in a global variable for performance, see vm.c +// Number of method calls after which to start generating code +// Threshold==1 means compile on first execution +#[no_mangle] +pub static mut rb_yjit_call_threshold: u64 = SMALL_CALL_THRESHOLD; + +// This option is exposed to the C side in a global variable for performance, see vm.c +// Number of execution requests after which a method is no longer +// considered hot. Raising this results in more generated code. +#[no_mangle] +pub static mut rb_yjit_cold_threshold: u64 = 200_000; // Command-line options -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Debug)] #[repr(C)] pub struct Options { - // Size of the executable memory block to allocate in bytes - // Note that the command line argument is expressed in MiB and not bytes - pub exec_mem_size: usize, - - // Number of method calls after which to start generating code - // Threshold==1 means compile on first execution - pub call_threshold: usize, + /// Soft limit of all memory used by YJIT in bytes + /// VirtualMem avoids allocating new pages if code_region_size + yjit_alloc_size + /// is larger than this threshold. Rust may still allocate memory beyond this limit. + pub mem_size: usize, - // Generate versions greedily until the limit is hit - pub greedy_versioning: bool, + /// Hard limit of the executable memory block to allocate in bytes + /// Note that the command line argument is expressed in MiB and not bytes + pub exec_mem_size: Option<usize>, // Disable the propagation of type information pub no_type_prop: bool, @@ -22,11 +43,27 @@ pub struct Options { // 1 means always create generic versions pub max_versions: usize, - // Capture and print out stats + // The number of registers allocated for stack temps + pub num_temp_regs: usize, + + // Disable Ruby builtin methods defined by `with_jit` hooks, e.g. Array#each in Ruby + pub c_builtin: bool, + + // Capture stats pub gen_stats: bool, + // Print stats on exit (when gen_stats is also true) + pub print_stats: bool, + // Trace locations of exits - pub gen_trace_exits: bool, + pub trace_exits: Option<TraceExits>, + + // how often to sample exit trace data + pub trace_exits_sample_rate: usize, + + // Whether to enable YJIT at boot. This option prevents other + // YJIT tuning options from enabling YJIT at boot. + pub disable: bool, /// Dump compiled and executed instructions for debugging pub dump_insns: bool, @@ -40,35 +77,91 @@ pub struct Options { /// Verify context objects (debug mode only) pub verify_ctx: bool, - /// Whether or not to assume a global constant state (and therefore - /// invalidating code whenever any constant changes) versus assuming - /// constant name components (and therefore invalidating code whenever a - /// matching name component changes) - pub global_constant_state: bool, + /// Enable generating frame pointers (for x86. arm64 always does this) + pub frame_pointer: bool, + + /// Run code GC when exec_mem_size is reached. + pub code_gc: bool, + + /// Enable writing /tmp/perf-{pid}.map for Linux perf + pub perf_map: Option<PerfMap>, + + // Where to store the log. `None` disables the log. + pub log: Option<LogOutput>, } // Initialize the options to default values pub static mut OPTIONS: Options = Options { - exec_mem_size: 64 * 1024 * 1024, - call_threshold: 30, - greedy_versioning: false, + mem_size: 128 * 1024 * 1024, + exec_mem_size: None, no_type_prop: false, max_versions: 4, + num_temp_regs: 5, + c_builtin: false, gen_stats: false, - gen_trace_exits: false, + trace_exits: None, + print_stats: true, + trace_exits_sample_rate: 0, + disable: false, dump_insns: false, dump_disasm: None, verify_ctx: false, - global_constant_state: false, dump_iseq_disasm: None, + frame_pointer: false, + code_gc: false, + perf_map: None, + log: None, }; -#[derive(Clone, PartialEq, Eq, Debug)] +/// YJIT option descriptions for `ruby --help`. +/// Note that --help allows only 80 characters per line, including indentation. 80-character limit --> | +pub const YJIT_OPTIONS: &'static [(&str, &str)] = &[ + ("--yjit-mem-size=num", "Soft limit on YJIT memory usage in MiB (default: 128)."), + ("--yjit-exec-mem-size=num", "Hard limit on executable memory block in MiB."), + ("--yjit-call-threshold=num", "Number of calls to trigger JIT."), + ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."), + ("--yjit-stats", "Enable collecting YJIT statistics."), + ("--yjit-log[=file|dir]", "Enable logging of YJIT's compilation activity."), + ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."), + ("--yjit-code-gc", "Run code GC when the code size reaches the limit."), + ("--yjit-perf", "Enable frame pointers and perf profiling."), + ("--yjit-trace-exits", "Record Ruby source location when exiting from generated code."), + ("--yjit-trace-exits-sample-rate=num", "Trace exit locations only every Nth occurrence."), +]; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum TraceExits { + // Trace all exits + All, + // Trace a specific counter + Counter(Counter), +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum LogOutput { + // Dump to the log file as events occur. + File(std::os::unix::io::RawFd), + // Keep the log in memory only + MemoryOnly, + // Dump to stderr when the process exits + Stderr +} + +#[derive(Debug)] pub enum DumpDisasm { // Dump to stdout Stdout, // Dump to "yjit_{pid}.log" file under the specified directory - File(String), + File(std::os::unix::io::RawFd), +} + +/// Type of symbols to dump into /tmp/perf-{pid}.map +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum PerfMap { + // Dump ISEQ symbols + ISEQ, + // Dump YJIT codegen symbols + Codegen, } /// Macro to get an option value by name @@ -76,7 +169,12 @@ macro_rules! get_option { // Unsafe is ok here because options are initialized // once before any Ruby code executes ($option_name:ident) => { - unsafe { OPTIONS.$option_name } + { + // Make this a statement since attributes on expressions are experimental + #[allow(unused_unsafe)] + let ret = unsafe { crate::options::OPTIONS.$option_name }; + ret + } }; } pub(crate) use get_option; @@ -90,6 +188,7 @@ macro_rules! get_option_ref { }; } pub(crate) use get_option_ref; +use crate::log::Log; /// Expected to receive what comes after the third dash in "--yjit-*". /// Empty string means user passed only "--yjit". C code rejects when @@ -111,6 +210,20 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { match (opt_name, opt_val) { ("", "") => (), // Simply --yjit + ("mem-size", _) => match opt_val.parse::<usize>() { + Ok(n) => { + if n == 0 || n > 2 * 1024 * 1024 { + return None + } + + // Convert from MiB to bytes internally for convenience + unsafe { OPTIONS.mem_size = n * 1024 * 1024 } + } + Err(_) => { + return None; + } + }, + ("exec-mem-size", _) => match opt_val.parse::<usize>() { Ok(n) => { if n == 0 || n > 2 * 1024 * 1024 { @@ -118,7 +231,7 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } // Convert from MiB to bytes internally for convenience - unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 } + unsafe { OPTIONS.exec_mem_size = Some(n * 1024 * 1024) } } Err(_) => { return None; @@ -126,7 +239,14 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { }, ("call-threshold", _) => match opt_val.parse() { - Ok(n) => unsafe { OPTIONS.call_threshold = n }, + Ok(n) => unsafe { rb_yjit_call_threshold = n }, + Err(_) => { + return None; + } + }, + + ("cold-threshold", _) => match opt_val.parse() { + Ok(n) => unsafe { rb_yjit_cold_threshold = n }, Err(_) => { return None; } @@ -139,27 +259,127 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } }, - ("dump-disasm", _) => match opt_val.to_string().as_str() { - "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) }, - directory => { - let pid = std::process::id(); - let path = format!("{directory}/yjit_{pid}.log"); - println!("YJIT disasm dump: {path}"); - unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(path)) } + ("disable", "") => unsafe { + OPTIONS.disable = true; + }, + + ("temp-regs", _) => match opt_val.parse() { + Ok(n) => { + assert!(n <= TEMP_REGS.len(), "--yjit-temp-regs must be <= {}", TEMP_REGS.len()); + unsafe { OPTIONS.num_temp_regs = n } } + Err(_) => { + return None; + } + }, + + ("c-builtin", _) => unsafe { + OPTIONS.c_builtin = true; + }, + + ("code-gc", _) => unsafe { + OPTIONS.code_gc = true; + }, + + ("perf", _) => match opt_val { + "" => unsafe { + OPTIONS.frame_pointer = true; + OPTIONS.perf_map = Some(PerfMap::ISEQ); + }, + "fp" => unsafe { OPTIONS.frame_pointer = true }, + "iseq" => unsafe { OPTIONS.perf_map = Some(PerfMap::ISEQ) }, + // Accept --yjit-perf=map for backward compatibility + "codegen" | "map" => unsafe { OPTIONS.perf_map = Some(PerfMap::Codegen) }, + _ => return None, }, + ("dump-disasm", _) => { + if !cfg!(feature = "disasm") { + eprintln!("WARNING: the {} option works best when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name); + } + + match opt_val { + "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) }, + directory => { + let path = format!("{directory}/yjit_{}.log", std::process::id()); + match File::options().create(true).append(true).open(&path) { + Ok(file) => { + use std::os::unix::io::IntoRawFd; + eprintln!("YJIT disasm dump: {path}"); + unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(file.into_raw_fd())) } + } + Err(err) => eprintln!("Failed to create {path}: {err}"), + } + } + } + }, + ("dump-iseq-disasm", _) => unsafe { + if !cfg!(feature = "disasm") { + eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name); + } + OPTIONS.dump_iseq_disasm = Some(opt_val.to_string()); }, - ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true }, ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true }, - ("stats", "") => unsafe { OPTIONS.gen_stats = true }, - ("trace-exits", "") => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true }, + ("stats", _) => match opt_val { + "" => unsafe { OPTIONS.gen_stats = true }, + "quiet" => unsafe { + OPTIONS.gen_stats = true; + OPTIONS.print_stats = false; + }, + _ => { + return None; + } + }, + ("log", _) => match opt_val { + "" => unsafe { + OPTIONS.log = Some(LogOutput::Stderr); + Log::init(); + }, + "quiet" => unsafe { + OPTIONS.log = Some(LogOutput::MemoryOnly); + Log::init(); + }, + arg_value => { + let log_file_path = if std::path::Path::new(arg_value).is_dir() { + format!("{arg_value}/yjit_{}.log", std::process::id()) + } else { + arg_value.to_string() + }; + + match File::options().create(true).write(true).truncate(true).open(&log_file_path) { + Ok(file) => { + use std::os::unix::io::IntoRawFd; + eprintln!("YJIT log: {log_file_path}"); + + unsafe { OPTIONS.log = Some(LogOutput::File(file.into_raw_fd())) } + Log::init() + } + Err(err) => panic!("Failed to create {log_file_path}: {err}"), + } + } + }, + ("trace-exits", _) => unsafe { + OPTIONS.gen_stats = true; + OPTIONS.trace_exits = match opt_val { + "" => Some(TraceExits::All), + name => match Counter::get(name) { + Some(counter) => Some(TraceExits::Counter(counter)), + None => return None, + }, + }; + }, + ("trace-exits-sample-rate", sample_rate) => unsafe { + OPTIONS.gen_stats = true; + if OPTIONS.trace_exits.is_none() { + OPTIONS.trace_exits = Some(TraceExits::All); + } + OPTIONS.trace_exits_sample_rate = sample_rate.parse().unwrap(); + }, ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true }, ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true }, - ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true }, // Option name not recognized _ => { @@ -167,8 +387,46 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } } + // before we continue, check that sample_rate is either 0 or a prime number + let trace_sample_rate = unsafe { OPTIONS.trace_exits_sample_rate }; + if trace_sample_rate > 1 { + let mut i = 2; + while i*i <= trace_sample_rate { + if trace_sample_rate % i == 0 { + println!("Warning: using a non-prime number as your sampling rate can result in less accurate sampling data"); + return Some(()); + } + i += 1; + } + } + // dbg!(unsafe {OPTIONS}); // Option successfully parsed return Some(()); } + +/// Print YJIT options for `ruby --help`. `width` is width of option parts, and +/// `columns` is indent width of descriptions. +#[no_mangle] +pub extern "C" fn rb_yjit_show_usage(help: c_int, highlight: c_int, width: c_uint, columns: c_int) { + for &(name, description) in YJIT_OPTIONS.iter() { + extern "C" { + fn ruby_show_usage_line(name: *const c_char, secondary: *const c_char, description: *const c_char, + help: c_int, highlight: c_int, width: c_uint, columns: c_int); + } + let name = CString::new(name).unwrap(); + let description = CString::new(description).unwrap(); + unsafe { ruby_show_usage_line(name.as_ptr(), null(), description.as_ptr(), help, highlight, width, columns) } + } +} + +/// Return true if --yjit-c-builtin is given +#[no_mangle] +pub extern "C" fn rb_yjit_c_builtin_p(_ec: EcPtr, _self: VALUE) -> VALUE { + if get_option!(c_builtin) { + Qtrue + } else { + Qfalse + } +} diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index de1310d78b..105def2fff 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -1,20 +1,99 @@ //! Everything related to the collection of runtime stats in YJIT -//! See the stats feature and the --yjit-stats command-line option +//! See the --yjit-stats command-line option -#![allow(dead_code)] // Counters are only used with the stats features +use std::ptr::addr_of_mut; +use std::sync::atomic::Ordering; +use std::time::Instant; +use std::collections::HashMap; use crate::codegen::CodegenGlobals; use crate::cruby::*; use crate::options::*; -use crate::yjit::yjit_enabled_p; +use crate::yjit::{yjit_enabled_p, YJIT_INIT_TIME}; -// stats_alloc is a middleware to instrument global allocations in Rust. -#[cfg(feature="stats")] -#[global_allocator] -static GLOBAL_ALLOCATOR: &stats_alloc::StatsAlloc<std::alloc::System> = &stats_alloc::INSTRUMENTED_SYSTEM; +#[cfg(feature = "stats_allocator")] +#[path = "../../jit/src/lib.rs"] +mod jit; -// YJIT exit counts for each instruction type -const VM_INSTRUCTION_SIZE_USIZE:usize = VM_INSTRUCTION_SIZE as usize; +/// Running total of how many ISeqs are in the system. +#[no_mangle] +pub static mut rb_yjit_live_iseq_count: u64 = 0; + +/// Monotonically increasing total of how many ISEQs were allocated +#[no_mangle] +pub static mut rb_yjit_iseq_alloc_count: u64 = 0; + +/// The number of bytes YJIT has allocated on the Rust heap. +pub fn yjit_alloc_size() -> usize { + jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst) +} + +/// Mapping of C function / ISEQ name to integer indices +/// This is accessed at compilation time only (protected by a lock) +static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None; +static mut ISEQ_NAME_TO_IDX: Option<HashMap<String, usize>> = None; + +/// Vector of call counts for each C function / ISEQ index +/// This is modified (but not resized) by JITted code +static mut CFUNC_CALL_COUNT: Option<Vec<u64>> = None; +static mut ISEQ_CALL_COUNT: Option<Vec<u64>> = None; + +/// Assign an index to a given cfunc name string +pub fn get_cfunc_idx(name: &str) -> usize { + // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables. + unsafe { get_method_idx(name, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)) } +} + +/// Assign an index to a given ISEQ name string +pub fn get_iseq_idx(name: &str) -> usize { + // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables. + unsafe { get_method_idx(name, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)) } +} + +fn get_method_idx( + name: &str, + method_name_to_idx: &mut Option<HashMap<String, usize>>, + method_call_count: &mut Option<Vec<u64>>, +) -> usize { + //println!("{}", name); + + let name_to_idx = method_name_to_idx.get_or_insert_with(HashMap::default); + let call_count = method_call_count.get_or_insert_with(Vec::default); + + match name_to_idx.get(name) { + Some(idx) => *idx, + None => { + let idx = name_to_idx.len(); + name_to_idx.insert(name.to_string(), idx); + + // Resize the call count vector + if idx >= call_count.len() { + call_count.resize(idx + 1, 0); + } + + idx + } + } +} + +// Increment the counter for a C function +pub extern "C" fn incr_cfunc_counter(idx: usize) { + let cfunc_call_count = unsafe { CFUNC_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < cfunc_call_count.len()); + cfunc_call_count[idx] += 1; +} + +// Increment the counter for an ISEQ +pub extern "C" fn incr_iseq_counter(idx: usize) { + let iseq_call_count = unsafe { ISEQ_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < iseq_call_count.len()); + iseq_call_count[idx] += 1; +} + +/// YJIT exit counts for each instruction type. +/// Note that `VM_INSTRUCTION_SIZE` is an upper bound and the actual number +/// of VM opcodes may be different in the build. See [`rb_vm_instruction_size()`] +const VM_INSTRUCTION_SIZE_USIZE: usize = VM_INSTRUCTION_SIZE as usize; static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE_USIZE] = [0; VM_INSTRUCTION_SIZE_USIZE]; /// Global state needed for collecting backtraces of exits @@ -24,7 +103,9 @@ pub struct YjitExitLocations { raw_samples: Vec<VALUE>, /// Vec to hold line_samples which represent line numbers of /// the iseq caller. - line_samples: Vec<i32> + line_samples: Vec<i32>, + /// Number of samples skipped when sampling + skipped_samples: usize } /// Private singleton instance of yjit exit locations @@ -33,19 +114,15 @@ static mut YJIT_EXIT_LOCATIONS: Option<YjitExitLocations> = None; impl YjitExitLocations { /// Initialize the yjit exit locations pub fn init() { - // Return if the stats feature is disabled - if !cfg!(feature = "stats") { - return; - } - // Return if --yjit-trace-exits isn't enabled - if !get_option!(gen_trace_exits) { + if get_option!(trace_exits).is_none() { return; } let yjit_exit_locations = YjitExitLocations { raw_samples: Vec::new(), - line_samples: Vec::new() + line_samples: Vec::new(), + skipped_samples: 0 }; // Initialize the yjit exit locations instance @@ -69,6 +146,11 @@ impl YjitExitLocations { &mut YjitExitLocations::get_instance().line_samples } + /// Get the number of samples skipped + pub fn get_skipped_samples() -> &'static mut usize { + &mut YjitExitLocations::get_instance().skipped_samples + } + /// Mark the data stored in YjitExitLocations::get_raw_samples that needs to be used by /// rb_yjit_add_frame. YjitExitLocations::get_raw_samples are an array of /// VALUE pointers, exit instruction, and number of times we've seen this stack row @@ -81,13 +163,8 @@ impl YjitExitLocations { return; } - // Return if the stats feature is disabled - if !cfg!(feature = "stats") { - return; - } - // Return if --yjit-trace-exits isn't enabled - if !get_option!(gen_trace_exits) { + if get_option!(trace_exits).is_none() { return; } @@ -109,7 +186,7 @@ impl YjitExitLocations { // Increase index for exit instruction. idx += 1; - // Increase index for bookeeping value (number of times we've seen this + // Increase index for bookkeeping value (number of times we've seen this // row in a stack). idx += 1; } @@ -123,6 +200,28 @@ macro_rules! make_counters { #[derive(Default, Debug)] pub struct Counters { $(pub $counter_name: u64),+ } + /// Enum to represent a counter + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, PartialEq, Eq, Debug)] + pub enum Counter { $($counter_name),+ } + + impl Counter { + /// Map a counter name string to a counter enum + pub fn get(name: &str) -> Option<Counter> { + match name { + $( stringify!($counter_name) => { Some(Counter::$counter_name) } ),+ + _ => None, + } + } + + /// Get a counter name string + pub fn get_name(&self) -> String { + match self { + $( Counter::$counter_name => stringify!($counter_name).to_string() ),+ + } + } + } + /// Global counters instance, initialized to zero pub static mut COUNTERS: Counters = Counters { $($counter_name: 0),+ }; @@ -130,7 +229,7 @@ macro_rules! make_counters { const COUNTER_NAMES: &'static [&'static str] = &[ $(stringify!($counter_name)),+ ]; /// Map a counter name string to a counter pointer - fn get_counter_ptr(name: &str) -> *mut u64 { + pub fn get_counter_ptr(name: &str) -> *mut u64 { match name { $( stringify!($counter_name) => { ptr_to_counter!($counter_name) } ),+ _ => panic!() @@ -139,6 +238,66 @@ macro_rules! make_counters { } } +/// The list of counters that are available without --yjit-stats. +/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`. +pub const DEFAULT_COUNTERS: &'static [Counter] = &[ + Counter::code_gc_count, + Counter::compiled_iseq_entry, + Counter::cold_iseq_entry, + Counter::compiled_iseq_count, + Counter::compiled_blockid_count, + Counter::compiled_block_count, + Counter::deleted_defer_block_count, + Counter::compiled_branch_count, + Counter::compile_time_ns, + Counter::compilation_failure, + Counter::max_inline_versions, + Counter::inline_block_count, + Counter::num_contexts_encoded, + Counter::context_cache_hits, + + Counter::invalidation_count, + Counter::invalidate_method_lookup, + Counter::invalidate_bop_redefined, + Counter::invalidate_ractor_spawn, + Counter::invalidate_constant_state_bump, + Counter::invalidate_constant_ic_fill, + Counter::invalidate_no_singleton_class, + Counter::invalidate_ep_escape, + Counter::invalidate_everything, +]; + +/// Macro to increase a counter by name and count +macro_rules! incr_counter_by { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($counter_name:ident, $count:expr) => { + #[allow(unused_unsafe)] + { + unsafe { $crate::stats::COUNTERS.$counter_name += $count as u64 } + } + }; +} +pub(crate) use incr_counter_by; + +/// Macro to increase a counter if the given value is larger +macro_rules! incr_counter_to { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($counter_name:ident, $count:expr) => { + #[allow(unused_unsafe)] + { + unsafe { + $crate::stats::COUNTERS.$counter_name = u64::max( + $crate::stats::COUNTERS.$counter_name, + $count as u64, + ) + } + } + }; +} +pub(crate) use incr_counter_to; + /// Macro to increment a counter by name macro_rules! incr_counter { // Unsafe is ok here because options are initialized @@ -161,136 +320,235 @@ macro_rules! ptr_to_counter { } }; } -pub(crate) use ptr_to_counter; // Declare all the counters we track make_counters! { - exec_instruction, + yjit_insns_count, - send_keywords, - send_kw_splat, - send_args_splat_super, - send_iseq_zsuper, - send_block_arg, + // Method calls that fallback to dynamic dispatch + send_singleton_class, + send_forwarding, send_ivar_set_method, send_zsuper_method, send_undef_method, - send_optimized_method, - send_optimized_method_call, send_optimized_method_block_call, send_call_block, send_call_kwarg, send_call_multi_ractor, + send_cme_not_found, + send_megamorphic, send_missing_method, send_refined_method, - send_cfunc_ruby_array_varg, + send_private_not_fcall, + send_cfunc_kw_splat_non_nil, + send_cfunc_splat_neg2, send_cfunc_argc_mismatch, + send_cfunc_block_arg, send_cfunc_toomany_args, send_cfunc_tracing, - send_cfunc_kwargs, send_cfunc_splat_with_kw, + send_cfunc_splat_varg_ruby2_keywords, send_attrset_kwargs, + send_attrset_block_arg, send_iseq_tailcall, send_iseq_arity_error, - send_iseq_only_keywords, - send_iseq_kwargs_req_and_opt_missing, + send_iseq_block_arg_type, + send_iseq_clobbering_block_arg, + send_iseq_block_arg_gc_unsafe, + send_iseq_complex_discard_extras, + send_iseq_leaf_builtin_block_arg_block_param, + send_iseq_kw_splat_non_nil, send_iseq_kwargs_mismatch, - send_iseq_has_rest, send_iseq_has_post, - send_iseq_has_kwrest, send_iseq_has_no_kw, send_iseq_accepts_no_kwarg, send_iseq_materialized_block, - send_iseq_splat_with_opt, + send_iseq_send_forwarding, + send_iseq_splat_not_array, send_iseq_splat_with_kw, send_iseq_missing_optional_kw, send_iseq_too_many_kwargs, send_not_implemented_method, send_getter_arity, - send_se_cf_overflow, - send_se_protected_check_failed, - send_splatarray_length_not_equal, - send_splatarray_last_ruby_2_keywords, - send_splat_not_array, - send_args_splat_non_iseq, - send_args_splat_ivar, + send_getter_block_arg, send_args_splat_attrset, send_args_splat_bmethod, send_args_splat_aref, send_args_splat_aset, - send_args_splat_optimized, - send_args_splat_cfunc_var_args, - send_args_splat_cfunc_zuper, - send_args_splat_cfunc_ruby2_keywords, - send_iseq_ruby2_keywords, - send_send_not_imm, + send_args_splat_opt_call, + send_iseq_splat_arity_error, + send_splat_too_long, send_send_wrong_args, send_send_null_mid, send_send_null_cme, send_send_nested, - send_send_chain, - send_send_chain_string, - send_send_chain_not_string, - send_send_chain_not_sym, - send_send_chain_not_string_or_sym, - send_send_getter, - send_send_builtin, - + send_send_attr_reader, + send_send_attr_writer, + send_iseq_has_rest_and_captured, + send_iseq_has_kwrest_and_captured, + send_iseq_has_rest_and_kw_supplied, + send_iseq_has_rest_opt_and_block, send_bmethod_ractor, send_bmethod_block_arg, - - traced_cfunc_return, - - invokesuper_me_changed, - invokesuper_block, - + send_optimized_block_arg, + send_pred_not_fixnum, + send_pred_underflow, + send_str_dup_exivar, + + invokesuper_defined_class_mismatch, + invokesuper_forwarding, + invokesuper_kw_splat, + invokesuper_kwarg, + invokesuper_megamorphic, + invokesuper_no_cme, + invokesuper_no_me, + invokesuper_not_iseq_or_cfunc, + invokesuper_refinement, + invokesuper_singleton_class, + + invokeblock_megamorphic, invokeblock_none, - invokeblock_iseq_arg0_splat, - invokeblock_iseq_block_changed, - invokeblock_iseq_tag_changed, - invokeblock_ifunc, + invokeblock_iseq_arg0_optional, + invokeblock_iseq_arg0_args_splat, + invokeblock_iseq_arg0_not_array, + invokeblock_iseq_arg0_wrong_len, + invokeblock_iseq_not_inlined, + invokeblock_ifunc_args_splat, + invokeblock_ifunc_kw_splat, invokeblock_proc, invokeblock_symbol, + // Method calls that exit to the interpreter + guard_send_block_arg_type, + guard_send_getter_splat_non_empty, + guard_send_klass_megamorphic, + guard_send_se_cf_overflow, + guard_send_se_protected_check_failed, + guard_send_splatarray_length_not_equal, + guard_send_splatarray_last_ruby2_keywords, + guard_send_splat_not_array, + guard_send_send_name_chain, + guard_send_iseq_has_rest_and_splat_too_few, + guard_send_is_a_class_mismatch, + guard_send_instance_of_class_mismatch, + guard_send_interrupted, + guard_send_not_fixnums, + guard_send_not_fixnum, + guard_send_not_fixnum_or_flonum, + guard_send_not_string, + guard_send_respond_to_mid_mismatch, + guard_send_str_aref_not_fixnum, + + guard_send_cfunc_bad_splat_vargs, + guard_send_cfunc_block_not_nil, + + guard_invokesuper_me_changed, + + guard_invokeblock_tag_changed, + guard_invokeblock_iseq_block_changed, + + traced_cfunc_return, + leave_se_interrupt, leave_interp_return, - leave_start_pc_non_zero, - getivar_se_self_not_heap, - getivar_idx_out_of_range, getivar_megamorphic, + getivar_not_heap, - setivar_se_self_not_heap, - setivar_idx_out_of_range, - setivar_val_heapobject, - setivar_name_not_mapped, - setivar_not_object, + setivar_not_heap, setivar_frozen, setivar_megamorphic, - oaref_argc_not_one, - oaref_arg_not_fixnum, + definedivar_not_heap, + definedivar_megamorphic, + + setlocal_wb_required, + + invokebuiltin_too_many_args, + + opt_plus_overflow, + opt_minus_overflow, + opt_mult_overflow, + + opt_succ_not_fixnum, + opt_succ_overflow, - opt_getinlinecache_miss, + opt_mod_zero, + opt_div_zero, + + lshift_amount_changed, + lshift_overflow, + + rshift_amount_changed, + + opt_aref_argc_not_one, + opt_aref_arg_not_fixnum, + opt_aref_not_array, + opt_aref_not_hash, + + opt_aset_not_array, + opt_aset_not_fixnum, + opt_aset_not_hash, + opt_aset_frozen, + + opt_case_dispatch_megamorphic, + + opt_getconstant_path_ic_miss, + opt_getconstant_path_multi_ractor, expandarray_splat, expandarray_postarg, expandarray_not_array, - expandarray_rhs_too_small, + expandarray_to_ary, + expandarray_method_missing, + expandarray_chain_max_depth, + + // getblockparam + gbp_wb_required, + // getblockparamproxy + gbpp_unsupported_type, gbpp_block_param_modified, + gbpp_block_handler_not_none, gbpp_block_handler_not_iseq, + gbpp_block_handler_not_proc, + + branchif_interrupted, + branchunless_interrupted, + branchnil_interrupted, + jump_interrupted, + + objtostring_not_string, + + getbyte_idx_not_fixnum, + getbyte_idx_negative, + getbyte_idx_out_of_bounds, + + splatkw_not_hash, + splatkw_not_nil, binding_allocations, binding_set, - vm_insns_count, + compiled_iseq_entry, + cold_iseq_entry, compiled_iseq_count, + compiled_blockid_count, compiled_block_count, compiled_branch_count, + compile_time_ns, compilation_failure, + abandoned_block_count, block_next_count, defer_count, + defer_empty_count, + deleted_defer_block_count, + branch_insn_count, + branch_known_count, + max_inline_versions, + inline_block_count, + num_contexts_encoded, + freed_iseq_count, exit_from_branch_stub, @@ -301,17 +559,52 @@ make_counters! { invalidate_ractor_spawn, invalidate_constant_state_bump, invalidate_constant_ic_fill, - - constant_state_bumps, + invalidate_no_singleton_class, + invalidate_ep_escape, + invalidate_everything, // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in // executable memory, so this should be 0. exec_mem_non_bump_alloc, + code_gc_count, + num_gc_obj_refs, - x86_call_rel32, - x86_call_reg, + num_send, + num_send_known_class, + num_send_polymorphic, + num_send_x86_rel32, + num_send_x86_reg, + num_send_dynamic, + num_send_cfunc, + num_send_cfunc_inline, + num_send_iseq, + num_send_iseq_leaf, + num_send_iseq_inline, + + num_getivar_megamorphic, + num_setivar_megamorphic, + num_opt_case_dispatch_megamorphic, + + num_throw, + num_throw_break, + num_throw_retry, + num_throw_return, + + num_lazy_frame_check, + num_lazy_frame_push, + lazy_frame_count, + lazy_frame_failure, + + iseq_stack_too_large, + iseq_too_long, + + temp_reg_opnd, + temp_mem_opnd, + temp_spill, + + context_cache_hits, } //=========================================================================== @@ -328,21 +621,30 @@ pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALU } } +/// Primitive called in yjit.rb +/// Check if stats generation should print at exit +#[no_mangle] +pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if yjit_enabled_p() && get_option!(print_stats) { + return Qtrue; + } else { + return Qfalse; + } +} + /// Primitive called in yjit.rb. /// Export all YJIT statistics as a Ruby hash. #[no_mangle] -pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { - with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict()) +pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, key: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(key)) } /// Primitive called in yjit.rb /// -/// Check if trace_exits generation is enabled. Requires the stats feature -/// to be enabled. +/// Check if trace_exits generation is enabled. #[no_mangle] pub extern "C" fn rb_yjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { - #[cfg(feature = "stats")] - if get_option!(gen_trace_exits) { + if get_option!(trace_exits).is_some() { return Qtrue; } @@ -358,17 +660,12 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V return Qnil; } - // Return if the stats feature is disabled - if !cfg!(feature = "stats") { - return Qnil; - } - // Return if --yjit-trace-exits isn't enabled - if !get_option!(gen_trace_exits) { + if get_option!(trace_exits).is_none() { return Qnil; } - // If the stats feature is enabled, pass yjit_raw_samples and yjit_line_samples + // Pass yjit_raw_samples and yjit_line_samples // to the C function called rb_yjit_exit_locations_dict for parsing. let yjit_raw_samples = YjitExitLocations::get_raw_samples(); let yjit_line_samples = YjitExitLocations::get_line_samples(); @@ -386,102 +683,216 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V } } +/// Increment a counter by name from the CRuby side +/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops +#[no_mangle] +pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) { + use std::ffi::CStr; + let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() }; + let counter_ptr = get_counter_ptr(counter_name); + unsafe { *counter_ptr += 1 }; +} + /// Export all YJIT statistics as a Ruby hash. -fn rb_yjit_gen_stats_dict() -> VALUE { +fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { // If YJIT is not enabled, return Qnil if !yjit_enabled_p() { return Qnil; } - macro_rules! hash_aset_usize { - ($hash:ident, $counter_name:expr, $value:expr) => { - let key = rust_str_to_sym($counter_name); - let value = VALUE::fixnum_from_usize($value); - rb_hash_aset($hash, key, value); + let hash = if key == Qnil { + unsafe { rb_hash_new() } + } else { + Qnil + }; + + macro_rules! set_stat { + ($hash:ident, $name:expr, $value:expr) => { + let rb_key = rust_str_to_sym($name); + if key == rb_key { + return $value; + } else if hash != Qnil { + rb_hash_aset($hash, rb_key, $value); + } } } - let hash = unsafe { rb_hash_new() }; + macro_rules! set_stat_usize { + ($hash:ident, $name:expr, $value:expr) => { + set_stat!($hash, $name, VALUE::fixnum_from_usize($value)); + } + } + + macro_rules! set_stat_double { + ($hash:ident, $name:expr, $value:expr) => { + set_stat!($hash, $name, rb_float_new($value)); + } + } - // CodeBlock stats unsafe { // Get the inline and outlined code blocks let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); // Inline code size - hash_aset_usize!(hash, "inline_code_size", cb.code_size()); + set_stat_usize!(hash, "inline_code_size", cb.code_size()); // Outlined code size - hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size()); + set_stat_usize!(hash, "outlined_code_size", ocb.unwrap().code_size()); // GCed pages let freed_page_count = cb.num_freed_pages(); - hash_aset_usize!(hash, "freed_page_count", freed_page_count); + set_stat_usize!(hash, "freed_page_count", freed_page_count); // GCed code size - hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size()); + set_stat_usize!(hash, "freed_code_size", freed_page_count * cb.page_size()); // Live pages - hash_aset_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count); - - // Code GC count - hash_aset_usize!(hash, "code_gc_count", CodegenGlobals::get_code_gc_count()); + set_stat_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count); // Size of memory region allocated for JIT code - hash_aset_usize!(hash, "code_region_size", cb.mapped_region_size()); + set_stat_usize!(hash, "code_region_size", cb.mapped_region_size()); // Rust global allocations in bytes - #[cfg(feature="stats")] - hash_aset_usize!(hash, "yjit_alloc_size", global_allocation_size()); + set_stat_usize!(hash, "yjit_alloc_size", yjit_alloc_size()); + + // How many bytes we are using to store context data + let context_data = CodegenGlobals::get_context_data(); + set_stat_usize!(hash, "context_data_bytes", context_data.num_bytes()); + set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES); + + // VM instructions count + if rb_vm_insn_count > 0 { + set_stat_usize!(hash, "vm_insns_count", rb_vm_insn_count as usize); + } + + set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize); + set_stat_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize); + + set_stat!(hash, "object_shape_count", rb_object_shape_count()); + + // Time since YJIT init in nanoseconds + let time_nanos = Instant::now().duration_since(YJIT_INIT_TIME.unwrap()).as_nanos(); + set_stat_usize!(hash, "yjit_active_ns", time_nanos as usize); } - // If we're not generating stats, the hash is done + // If we're not generating stats, put only default counters if !get_option!(gen_stats) { + for counter in DEFAULT_COUNTERS { + // Get the counter value + let counter_ptr = get_counter_ptr(&counter.get_name()); + let counter_val = unsafe { *counter_ptr }; + + // Put counter into hash + let key = &counter.get_name(); + let value = VALUE::fixnum_from_usize(counter_val as usize); + unsafe { set_stat!(hash, key, value); } + } + return hash; } - // If the stats feature is enabled - unsafe { // Indicate that the complete set of stats is available - rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue); + set_stat!(hash, "all_stats", Qtrue); // For each counter we track for counter_name in COUNTER_NAMES { // Get the counter value let counter_ptr = get_counter_ptr(counter_name); let counter_val = *counter_ptr; - - #[cfg(not(feature = "stats"))] - if counter_name == &"vm_insns_count" { - // If the stats feature is disabled, we don't have vm_insns_count - // so we are going to exlcude the key - continue; - } - - // Put counter into hash - let key = rust_str_to_sym(counter_name); - let value = VALUE::fixnum_from_usize(counter_val as usize); - rb_hash_aset(hash, key, value); + set_stat_usize!(hash, counter_name, counter_val as usize); } + let mut side_exits = 0; + // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME" // and the value is the count of side exits for that instruction. - for op_idx in 0..VM_INSTRUCTION_SIZE_USIZE { + use crate::utils::IntoUsize; + for op_idx in 0..rb_vm_instruction_size().as_usize() { let op_name = insn_name(op_idx); let key_string = "exit_".to_owned() + &op_name; - let key = rust_str_to_sym(&key_string); - let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize); - rb_hash_aset(hash, key, value); + let count = EXIT_OP_COUNT[op_idx]; + side_exits += count; + set_stat_usize!(hash, &key_string, count as usize); + } + + set_stat_usize!(hash, "side_exit_count", side_exits as usize); + + let total_exits = side_exits + *get_counter_ptr(&Counter::leave_interp_return.get_name()); + set_stat_usize!(hash, "total_exit_count", total_exits as usize); + + // Number of instructions that finish executing in YJIT. + // See :count-placement: about the subtraction. + let retired_in_yjit = *get_counter_ptr(&Counter::yjit_insns_count.get_name()) - side_exits; + + // Average length of instruction sequences executed by YJIT + let avg_len_in_yjit: f64 = if total_exits > 0 { + retired_in_yjit as f64 / total_exits as f64 + } else { + 0_f64 + }; + set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit); + + // Proportion of instructions that retire in YJIT + if rb_vm_insn_count > 0 { + let total_insns_count = retired_in_yjit + rb_vm_insn_count; + set_stat_usize!(hash, "total_insns_count", total_insns_count as usize); + + let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64; + set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit); } + + // Set method call counts in a Ruby dict + fn set_call_counts( + calls_hash: VALUE, + method_name_to_idx: &mut Option<HashMap<String, usize>>, + method_call_count: &mut Option<Vec<u64>>, + ) { + if let (Some(name_to_idx), Some(call_counts)) = (method_name_to_idx, method_call_count) { + // Create a list of (name, call_count) pairs + let mut pairs = Vec::new(); + for (name, idx) in name_to_idx { + let count = call_counts[*idx]; + pairs.push((name, count)); + } + + // Sort the vectors by decreasing call counts + pairs.sort_by_key(|e| -(e.1 as i64)); + + // Cap the number of counts reported to avoid + // bloating log files, etc. + pairs.truncate(20); + + // Add the pairs to the dict + for (name, call_count) in pairs { + let key = rust_str_to_sym(name); + let value = VALUE::fixnum_from_usize(call_count as usize); + unsafe { rb_hash_aset(calls_hash, key, value); } + } + } + } + + // Create a hash for the cfunc call counts + set_stat!(hash, "cfunc_calls", { + let cfunc_calls = rb_hash_new(); + set_call_counts(cfunc_calls, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)); + cfunc_calls + }); + + // Create a hash for the ISEQ call counts + set_stat!(hash, "iseq_calls", { + let iseq_calls = rb_hash_new(); + set_call_counts(iseq_calls, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)); + iseq_calls + }); } hash } /// Record the backtrace when a YJIT exit occurs. This functionality requires -/// that the stats feature is enabled as well as the --yjit-trace-exits option. +/// the --yjit-trace-exits option. /// /// This function will fill two Vec's in YjitExitLocations to record the raw samples /// and line samples. Their length should be the same, however the data stored in @@ -494,20 +905,26 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) return; } - // Return if the stats feature is disabled - if !cfg!(feature = "stats") { + // Return if --yjit-trace-exits isn't enabled + if get_option!(trace_exits).is_none() { return; } - // Return if --yjit-trace-exits isn't enabled - if !get_option!(gen_trace_exits) { - return; + if get_option!(trace_exits_sample_rate) > 0 { + if get_option!(trace_exits_sample_rate) <= *YjitExitLocations::get_skipped_samples() { + YjitExitLocations::get_instance().skipped_samples = 0; + } else { + YjitExitLocations::get_instance().skipped_samples += 1; + return; + } } // rb_vm_insn_addr2opcode won't work in cargo test --all-features // because it's a C function. Without insn call, this function is useless // so wrap the whole thing in a not test check. - if cfg!(not(test)) { + let _ = exit_pc; + #[cfg(not(test))] + { // Get the opcode from the encoded insn handler at this PC let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; @@ -540,7 +957,7 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) let mut prev_frame_idx = 0; let mut seen_already = true; - // If the previous stack lenght and current stack length are equal, + // If the previous stack length and current stack length are equal, // loop and compare the current frame to the previous frame. If they are // not equal, set seen_already to false and break out of the loop. if prev_stack_len == stack_length as i64 { @@ -591,10 +1008,8 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) // Push the insn value into the yjit_raw_samples Vec. yjit_raw_samples.push(VALUE(insn as usize)); - // Push the current line onto the yjit_line_samples Vec. This - // points to the line in insns.def. - let line = yjit_line_samples.len() - 1; - yjit_line_samples.push(line as i32); + // We don't know the line + yjit_line_samples.push(0); // Push number of times seen onto the stack, which is 1 // because it's the first time we've seen it. @@ -614,12 +1029,6 @@ pub extern "C" fn rb_yjit_reset_stats_bang(_ec: EcPtr, _ruby_self: VALUE) -> VAL return Qnil; } -/// Increment the number of instructions executed by the interpreter -#[no_mangle] -pub extern "C" fn rb_yjit_collect_vm_usage_insn() { - incr_counter!(vm_insns_count); -} - #[no_mangle] pub extern "C" fn rb_yjit_collect_binding_alloc() { incr_counter!(binding_allocations); @@ -645,9 +1054,11 @@ pub extern "C" fn rb_yjit_count_side_exit_op(exit_pc: *const VALUE) -> *const VA return exit_pc; } -// Get the size of global allocations in Rust. -#[cfg(feature="stats")] -fn global_allocation_size() -> usize { - let stats = GLOBAL_ALLOCATOR.stats(); - stats.bytes_allocated.saturating_sub(stats.bytes_deallocated) +/// Measure the time taken by func() and add that to yjit_compile_time. +pub fn with_compile_time<F, R>(func: F) -> R where F: FnOnce() -> R { + let start = Instant::now(); + let ret = func(); + let nanos = Instant::now().duration_since(start).as_nanos(); + incr_counter_by!(compile_time_ns, nanos); + ret } diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index f66000381e..251628fabf 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -3,9 +3,10 @@ use crate::backend::ir::*; use crate::cruby::*; use std::slice; +use std::os::raw::c_int; /// Trait for casting to [usize] that allows you to say `.as_usize()`. -/// Implementation conditional on the the cast preserving the numeric value on +/// Implementation conditional on the cast preserving the numeric value on /// all inputs and being inexpensive. /// /// [usize] is only guaranteed to be more than 16-bit wide, so we can't use @@ -51,6 +52,20 @@ impl IntoUsize for u8 { } } +/// The `Into<u64>` Rust does not provide. +/// Convert to u64 with assurance that the value is preserved. +/// Currently, `usize::BITS == 64` holds for all platforms we support. +pub(crate) trait IntoU64 { + fn as_u64(self) -> u64; +} + +#[cfg(target_pointer_width = "64")] +impl IntoU64 for usize { + fn as_u64(self) -> u64 { + self as u64 + } +} + /// Compute an offset in bytes of a given struct field #[allow(unused)] macro_rules! offset_of { @@ -73,20 +88,17 @@ pub(crate) use offset_of; // Convert a CRuby UTF-8-encoded RSTRING into a Rust string. // This should work fine on ASCII strings and anything else // that is considered legal UTF-8, including embedded nulls. -fn ruby_str_to_rust(v: VALUE) -> String { +pub fn ruby_str_to_rust(v: VALUE) -> String { let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; - match String::from_utf8(str_slice.to_vec()) { - Ok(utf8) => utf8, - Err(_) => String::new(), - } + String::from_utf8(str_slice.to_vec()).unwrap_or_default() } // Location is the file defining the method, colon, method name. // Filenames are sometimes internal strings supplied to eval, // so be careful with them. -pub fn iseq_get_location(iseq: IseqPtr, pos: u32) -> String { +pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String { let iseq_label = unsafe { rb_iseq_label(iseq) }; let iseq_path = unsafe { rb_iseq_path(iseq) }; let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) }; @@ -148,8 +160,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) { } } - asm.cpush_all(); - let argument = match opnd { Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => { // Sign-extend the value if necessary @@ -164,7 +174,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) { }; asm.ccall(print_int_fn as *const u8, vec![argument]); - asm.cpop_all(); } /// Generate code to print a pointer @@ -177,9 +186,7 @@ pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { assert!(opnd.rm_num_bits() == 64); - asm.cpush_all(); asm.ccall(print_ptr_fn as *const u8, vec![opnd]); - asm.cpop_all(); } /// Generate code to print a value @@ -192,9 +199,7 @@ pub fn print_value(asm: &mut Assembler, opnd: Opnd) { assert!(matches!(opnd, Opnd::Value(_))); - asm.cpush_all(); asm.ccall(print_value_fn as *const u8, vec![opnd]); - asm.cpop_all(); } /// Generate code to print constant string to stdout @@ -209,7 +214,6 @@ pub fn print_str(asm: &mut Assembler, str: &str) { } } - asm.cpush_all(); let string_data = asm.new_label("string_data"); let after_string = asm.new_label("after_string"); @@ -219,10 +223,16 @@ pub fn print_str(asm: &mut Assembler, str: &str) { asm.bake_string(str); asm.write_label(after_string); - let opnd = asm.lea_label(string_data); + let opnd = asm.lea_jump_target(string_data); asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]); +} - asm.cpop_all(); +pub fn stdout_supports_colors() -> bool { + // TODO(max): Use std::io::IsTerminal after upgrading Rust to 1.70 + extern "C" { fn isatty(fd: c_int) -> c_int; } + let stdout = 1; + let is_terminal = unsafe { isatty(stdout) } == 1; + is_terminal } #[cfg(test)] @@ -259,19 +269,19 @@ mod tests { #[test] fn test_print_int() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); let mut cb = CodeBlock::new_dummy(1024); print_int(&mut asm, Opnd::Imm(42)); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); } #[test] fn test_print_str() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); let mut cb = CodeBlock::new_dummy(1024); print_str(&mut asm, "Hello, world!"); - asm.compile(&mut cb); + asm.compile(&mut cb, None).unwrap(); } } diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs index 1a5b2b1908..9126cf300e 100644 --- a/yjit/src/virtualmem.rs +++ b/yjit/src/virtualmem.rs @@ -3,9 +3,12 @@ // usize->pointer casts is viable. It seems like a lot of work for us to participate for not much // benefit. -use std::ptr::NonNull; +use std::{cell::RefCell, ptr::NonNull}; -use crate::{utils::IntoUsize, backend::ir::Target}; +use crate::{backend::ir::Target, stats::yjit_alloc_size, utils::IntoUsize}; + +#[cfg(test)] +use crate::options::get_option; #[cfg(not(test))] pub type VirtualMem = VirtualMemory<sys::SystemAllocator>; @@ -26,15 +29,24 @@ pub struct VirtualMemory<A: Allocator> { /// Location of the virtual memory region. region_start: NonNull<u8>, - /// Size of the region in bytes. + /// Size of this virtual memory region in bytes. region_size_bytes: usize, + /// mapped_region_bytes + yjit_alloc_size may not increase beyond this limit. + memory_limit_bytes: usize, + /// Number of bytes per "page", memory protection permission can only be controlled at this /// granularity. page_size_bytes: usize, + /// Mutable parts. + mutable: RefCell<VirtualMemoryMut<A>>, +} + +/// Mutable parts of [`VirtualMemory`]. +pub struct VirtualMemoryMut<A: Allocator> { /// Number of bytes that have we have allocated physical memory for starting at - /// [Self::region_start]. + /// [VirtualMemory::region_start]. mapped_region_bytes: usize, /// Keep track of the address of the last written to page. @@ -57,14 +69,39 @@ pub trait Allocator { fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool; } -/// Pointer into a [VirtualMemory]. -/// We may later change this to wrap an u32. -/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead. +/// Pointer into a [VirtualMemory] represented as an offset from the base. +/// Note: there is no NULL constant for [CodePtr]. You should use `Option<CodePtr>` instead. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] #[repr(C, packed)] -pub struct CodePtr(NonNull<u8>); +pub struct CodePtr(u32); impl CodePtr { + /// Advance the CodePtr. Can return a dangling pointer. + pub fn add_bytes(self, bytes: usize) -> Self { + let CodePtr(raw) = self; + let bytes: u32 = bytes.try_into().unwrap(); + CodePtr(raw + bytes) + } + + /// Note that the raw pointer might be dangling if there hasn't + /// been any writes to it through the [VirtualMemory] yet. + pub fn raw_ptr(self, base: &impl CodePtrBase) -> *const u8 { + let CodePtr(offset) = self; + return base.base_ptr().as_ptr().wrapping_add(offset.as_usize()) + } + + /// Get the address of the code pointer. + pub fn raw_addr(self, base: &impl CodePtrBase) -> usize { + self.raw_ptr(base) as usize + } + + /// Get the offset component for the code pointer. Useful finding the distance between two + /// code pointers that share the same [VirtualMem]. + pub fn as_offset(self) -> i64 { + let CodePtr(offset) = self; + offset.into() + } + pub fn as_side_exit(self) -> Target { Target::SideExitPtr(self) } @@ -81,33 +118,46 @@ use WriteError::*; impl<A: Allocator> VirtualMemory<A> { /// Bring a part of the address space under management. - pub fn new(allocator: A, page_size: u32, virt_region_start: NonNull<u8>, size_bytes: usize) -> Self { + pub fn new( + allocator: A, + page_size: u32, + virt_region_start: NonNull<u8>, + region_size_bytes: usize, + memory_limit_bytes: usize, + ) -> Self { assert_ne!(0, page_size); let page_size_bytes = page_size.as_usize(); Self { region_start: virt_region_start, - region_size_bytes: size_bytes, + region_size_bytes, + memory_limit_bytes, page_size_bytes, - mapped_region_bytes: 0, - current_write_page: None, - allocator, + mutable: RefCell::new(VirtualMemoryMut { + mapped_region_bytes: 0, + current_write_page: None, + allocator, + }), } } /// Return the start of the region as a raw pointer. Note that it could be a dangling /// pointer so be careful dereferencing it. pub fn start_ptr(&self) -> CodePtr { - CodePtr(self.region_start) + CodePtr(0) } - pub fn end_ptr(&self) -> CodePtr { - CodePtr(NonNull::new(self.region_start.as_ptr().wrapping_add(self.mapped_region_bytes)).unwrap()) + pub fn mapped_end_ptr(&self) -> CodePtr { + self.start_ptr().add_bytes(self.mutable.borrow().mapped_region_bytes) + } + + pub fn virtual_end_ptr(&self) -> CodePtr { + self.start_ptr().add_bytes(self.region_size_bytes) } /// Size of the region in bytes that we have allocated physical memory for. pub fn mapped_region_size(&self) -> usize { - self.mapped_region_bytes + self.mutable.borrow().mapped_region_bytes } /// Size of the region in bytes where writes could be attempted. @@ -115,20 +165,28 @@ impl<A: Allocator> VirtualMemory<A> { self.region_size_bytes } + /// The granularity at which we can control memory permission. + /// On Linux, this is the page size that mmap(2) talks about. + pub fn system_page_size(&self) -> usize { + self.page_size_bytes + } + /// Write a single byte. The first write to a page makes it readable. - pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + pub fn write_byte(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + let mut mutable = self.mutable.borrow_mut(); + let page_size = self.page_size_bytes; - let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8; + let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8; let page_addr = (raw as usize / page_size) * page_size; - if self.current_write_page == Some(page_addr) { + if mutable.current_write_page == Some(page_addr) { // Writing within the last written to page, nothing to do } else { // Switching to a different and potentially new page let start = self.region_start.as_ptr(); - let mapped_region_end = start.wrapping_add(self.mapped_region_bytes); + let mapped_region_end = start.wrapping_add(mutable.mapped_region_bytes); let whole_region_end = start.wrapping_add(self.region_size_bytes); - let alloc = &mut self.allocator; + let alloc = &mut mutable.allocator; assert!((start..=whole_region_end).contains(&mapped_region_end)); @@ -140,8 +198,9 @@ impl<A: Allocator> VirtualMemory<A> { return Err(FailedPageMapping); } - self.current_write_page = Some(page_addr); - } else if (start..whole_region_end).contains(&raw) { + mutable.current_write_page = Some(page_addr); + } else if (start..whole_region_end).contains(&raw) && + (page_addr + page_size - start as usize) + yjit_alloc_size() < self.memory_limit_bytes { // Writing to a brand new page let mapped_region_end_addr = mapped_region_end as usize; let alloc_size = page_addr - mapped_region_end_addr + page_size; @@ -171,9 +230,9 @@ impl<A: Allocator> VirtualMemory<A> { unreachable!("unknown arch"); } } - self.mapped_region_bytes = self.mapped_region_bytes + alloc_size; + mutable.mapped_region_bytes = mutable.mapped_region_bytes + alloc_size; - self.current_write_page = Some(page_addr); + mutable.current_write_page = Some(page_addr); } else { return Err(OutOfBounds); } @@ -185,60 +244,66 @@ impl<A: Allocator> VirtualMemory<A> { Ok(()) } - /// Make all the code in the region executable. Call this at the end of a write session. - /// See [Self] for usual usage flow. - pub fn mark_all_executable(&mut self) { - self.current_write_page = None; + /// Make all the code in the region writeable. + /// Call this during GC before the phase of updating reference fields. + pub fn mark_all_writeable(&self) { + let mut mutable = self.mutable.borrow_mut(); + + mutable.current_write_page = None; let region_start = self.region_start; - let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap(); + let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap(); // Make mapped region executable - self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes); + if !mutable.allocator.mark_writable(region_start.as_ptr(), mapped_region_bytes) { + panic!("Cannot make memory region writable: {:?}-{:?}", + region_start.as_ptr(), + unsafe { region_start.as_ptr().add(mapped_region_bytes as usize)} + ); + } } - /// Free a range of bytes. start_ptr must be memory page-aligned. - pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) { - assert_eq!(start_ptr.into_usize() % self.page_size_bytes, 0); - self.allocator.mark_unused(start_ptr.0.as_ptr(), size); - } -} + /// Make all the code in the region executable. Call this at the end of a write session. + /// See [Self] for usual usage flow. + pub fn mark_all_executable(&self) { + let mut mutable = self.mutable.borrow_mut(); -impl CodePtr { - /// Note that the raw pointer might be dangling if there hasn't - /// been any writes to it through the [VirtualMemory] yet. - pub fn raw_ptr(self) -> *const u8 { - let CodePtr(ptr) = self; - return ptr.as_ptr(); - } + mutable.current_write_page = None; - /// Advance the CodePtr. Can return a dangling pointer. - pub fn add_bytes(self, bytes: usize) -> Self { - let CodePtr(raw) = self; - CodePtr(NonNull::new(raw.as_ptr().wrapping_add(bytes)).unwrap()) - } + let region_start = self.region_start; + let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap(); - pub fn into_i64(self) -> i64 { - let CodePtr(ptr) = self; - ptr.as_ptr() as i64 + // Make mapped region executable + mutable.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes); } - #[cfg(target_arch = "aarch64")] - pub fn into_u64(self) -> u64 { - let CodePtr(ptr) = self; - ptr.as_ptr() as u64 + /// Free a range of bytes. start_ptr must be memory page-aligned. + pub fn free_bytes(&self, start_ptr: CodePtr, size: u32) { + assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0); + + // Bounds check the request. We should only free memory we manage. + let mapped_region = self.start_ptr().raw_ptr(self)..self.mapped_end_ptr().raw_ptr(self); + let virtual_region = self.start_ptr().raw_ptr(self)..self.virtual_end_ptr().raw_ptr(self); + let last_byte_to_free = start_ptr.add_bytes(size.saturating_sub(1).as_usize()).raw_ptr(self); + assert!(mapped_region.contains(&start_ptr.raw_ptr(self))); + // On platforms where code page size != memory page size (e.g. Linux), we often need + // to free code pages that contain unmapped memory pages. When it happens on the last + // code page, it's more appropriate to check the last byte against the virtual region. + assert!(virtual_region.contains(&last_byte_to_free)); + + let mut mutable = self.mutable.borrow_mut(); + mutable.allocator.mark_unused(start_ptr.raw_ptr(self), size); } +} - pub fn into_usize(self) -> usize { - let CodePtr(ptr) = self; - ptr.as_ptr() as usize - } +/// Something that could provide a base pointer to compute a raw pointer from a [CodePtr]. +pub trait CodePtrBase { + fn base_ptr(&self) -> NonNull<u8>; } -impl From<*mut u8> for CodePtr { - fn from(value: *mut u8) -> Self { - assert!(value as usize != 0); - return CodePtr(NonNull::new(value).unwrap()); +impl<A: Allocator> CodePtrBase for VirtualMemory<A> { + fn base_ptr(&self) -> NonNull<u8> { + self.region_start } } @@ -254,15 +319,15 @@ mod sys { impl super::Allocator for SystemAllocator { fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool { - unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_writable(ptr as VoidPtr, size) } } fn mark_executable(&mut self, ptr: *const u8, size: u32) { - unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_executable(ptr as VoidPtr, size) } } fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool { - unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) } + unsafe { rb_jit_mark_unused(ptr as VoidPtr, size) } } } } @@ -349,17 +414,18 @@ pub mod tests { PAGE_SIZE.try_into().unwrap(), NonNull::new(mem_start as *mut u8).unwrap(), mem_size, + get_option!(mem_size), ) } #[test] #[cfg(target_arch = "x86_64")] fn new_memory_is_initialized() { - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); virt.write_byte(virt.start_ptr(), 1).unwrap(); assert!( - virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), + virt.mutable.borrow().allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), "Entire page should be initialized", ); @@ -367,21 +433,21 @@ pub mod tests { let three_pages = 3 * PAGE_SIZE; virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap(); assert!( - virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0), + virt.mutable.borrow().allocator.memory[..three_pages].iter().all(|&byte| byte != 0), "Gaps between write requests should be filled", ); } #[test] fn no_redundant_syscalls_when_writing_to_the_same_page() { - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); virt.write_byte(virt.start_ptr(), 1).unwrap(); virt.write_byte(virt.start_ptr(), 0).unwrap(); assert!( matches!( - virt.allocator.requests[..], + virt.mutable.borrow().allocator.requests[..], [MarkWritable { start_idx: 0, length: PAGE_SIZE }], ) ); @@ -390,12 +456,12 @@ pub mod tests { #[test] fn bounds_checking() { use super::WriteError::*; - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size()); assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0)); - let end_of_addr_space = CodePtr(NonNull::new(usize::MAX as _).unwrap()); + let end_of_addr_space = CodePtr(u32::MAX); assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0)); } @@ -403,7 +469,7 @@ pub mod tests { fn only_written_to_regions_become_executable() { // ... so we catch attempts to read/write/execute never-written-to regions const THREE_PAGES: usize = PAGE_SIZE * 3; - let mut virt = new_dummy_virt_mem(); + let virt = new_dummy_virt_mem(); let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2); virt.write_byte(page_two_start, 1).unwrap(); virt.mark_all_executable(); @@ -411,7 +477,7 @@ pub mod tests { assert!(virt.virtual_region_size() > THREE_PAGES); assert!( matches!( - virt.allocator.requests[..], + virt.mutable.borrow().allocator.requests[..], [ MarkWritable { start_idx: 0, length: THREE_PAGES }, MarkExecutable { start_idx: 0, length: THREE_PAGES }, diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 4850dca7a8..517a0daae5 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -4,77 +4,171 @@ use crate::cruby::*; use crate::invariants::*; use crate::options::*; use crate::stats::YjitExitLocations; +use crate::stats::incr_counter; +use crate::stats::with_compile_time; -use std::os::raw; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::os::raw::{c_char, c_int}; +use std::time::Instant; +use crate::log::Log; -/// For tracking whether the user enabled YJIT through command line arguments or environment -/// variables. AtomicBool to avoid `unsafe`. On x86 it compiles to simple movs. -/// See <https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html> -/// See [rb_yjit_enabled_p] -static YJIT_ENABLED: AtomicBool = AtomicBool::new(false); +/// Is YJIT on? The interpreter uses this variable to decide whether to trigger +/// compilation. See jit_exec() and jit_compile(). +#[allow(non_upper_case_globals)] +#[no_mangle] +pub static mut rb_yjit_enabled_p: bool = false; + +// Time when YJIT was yjit was initialized (see yjit_init) +pub static mut YJIT_INIT_TIME: Option<Instant> = None; /// Parse one command-line option. /// This is called from ruby.c #[no_mangle] -pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool { +pub extern "C" fn rb_yjit_parse_option(str_ptr: *const c_char) -> bool { return parse_option(str_ptr).is_some(); } -/// Is YJIT on? The interpreter uses this function to decide whether to increment -/// ISEQ call counters. See jit_exec(). -/// This is used frequently since it's used on every method call in the interpreter. #[no_mangle] -pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int { - // Note that we might want to call this function from signal handlers so - // might need to ensure signal-safety(7). - YJIT_ENABLED.load(Ordering::Acquire).into() +pub extern "C" fn rb_yjit_option_disable() -> bool { + return get_option!(disable); } /// Like rb_yjit_enabled_p, but for Rust code. pub fn yjit_enabled_p() -> bool { - YJIT_ENABLED.load(Ordering::Acquire) + unsafe { rb_yjit_enabled_p } } -/// After how many calls YJIT starts compiling a method +/// This function is called from C code #[no_mangle] -pub extern "C" fn rb_yjit_call_threshold() -> raw::c_uint { - get_option!(call_threshold) as raw::c_uint +pub extern "C" fn rb_yjit_init(yjit_enabled: bool) { + // Register the method codegen functions. This must be done at boot. + yjit_reg_method_codegen_fns(); + + // If --yjit-disable, yjit_init() will not be called until RubyVM::YJIT.enable. + if yjit_enabled { + yjit_init(); + } } -/// This function is called from C code -#[no_mangle] -pub extern "C" fn rb_yjit_init_rust() { +/// Initialize and enable YJIT. You should call this at boot or with GVL. +fn yjit_init() { // TODO: need to make sure that command-line options have been // initialized by CRuby + // Call YJIT hooks before enabling YJIT to avoid compiling the hooks themselves + unsafe { + let yjit = rb_const_get(rb_cRubyVM, rust_str_to_id("YJIT")); + rb_funcall(yjit, rust_str_to_id("call_jit_hooks"), 0); + } + // Catch panics to avoid UB for unwinding into C frames. // See https://doc.rust-lang.org/nomicon/exception-safety.html - // TODO: set a panic handler so the we don't print a message - // everytime we panic. let result = std::panic::catch_unwind(|| { Invariants::init(); CodegenGlobals::init(); YjitExitLocations::init(); + ids::init(); + + rb_bug_panic_hook(); // YJIT enabled and initialized successfully - YJIT_ENABLED.store(true, Ordering::Release); + assert!(unsafe{ !rb_yjit_enabled_p }); + unsafe { rb_yjit_enabled_p = true; } }); if let Err(_) = result { - println!("YJIT: rb_yjit_init_rust() panicked. Aborting."); + println!("YJIT: yjit_init() panicked. Aborting."); std::process::abort(); } + + // Make sure --yjit-perf doesn't append symbols to an old file + if get_option!(perf_map).is_some() { + let perf_map = format!("/tmp/perf-{}.map", std::process::id()); + let _ = std::fs::remove_file(&perf_map); + println!("YJIT perf map: {perf_map}"); + } + + // Note the time when YJIT was initialized + unsafe { + YJIT_INIT_TIME = Some(Instant::now()); + } +} + +#[no_mangle] +pub extern "C" fn rb_yjit_free_at_exit() { + yjit_shutdown_free_codegen_table(); +} + +/// At the moment, we abort in all cases we panic. +/// To aid with getting diagnostics in the wild without requiring +/// people to set RUST_BACKTRACE=1, register a panic hook that crash using rb_bug(). +/// rb_bug() might not be as good at printing a call trace as Rust's stdlib, but +/// it dumps some other info that might be relevant. +/// +/// In case we want to start doing fancier exception handling with panic=unwind, +/// we can revisit this later. For now, this helps to get us good bug reports. +fn rb_bug_panic_hook() { + use std::env; + use std::panic; + use std::io::{stderr, Write}; + + // Probably the default hook. We do this very early during process boot. + let previous_hook = panic::take_hook(); + + panic::set_hook(Box::new(move |panic_info| { + // Not using `eprintln` to avoid double panic. + let _ = stderr().write_all(b"ruby: YJIT has panicked. More info to follow...\n"); + + // Always show a Rust backtrace. + env::set_var("RUST_BACKTRACE", "1"); + previous_hook(panic_info); + + // Abort with rb_bug(). It has a length limit on the message. + let panic_message = &format!("{}", panic_info)[..]; + let len = std::cmp::min(0x100, panic_message.len()) as c_int; + unsafe { rb_bug(b"YJIT: %*s\0".as_ref().as_ptr() as *const c_char, len, panic_message.as_ptr()); } + })); } /// Called from C code to begin compiling a function /// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See jit_compile_exception() for details. #[no_mangle] -pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> *const u8 { - let maybe_code_ptr = gen_entry_point(iseq, ec); +pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> *const u8 { + // Don't compile when there is insufficient native stack space + if unsafe { rb_ec_stack_check(ec as _) } != 0 { + return std::ptr::null(); + } + + // Reject ISEQs with very large temp stacks, + // this will allow us to use u8/i8 values to track stack_size and sp_offset + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + if stack_max >= i8::MAX as u32 { + incr_counter!(iseq_stack_too_large); + return std::ptr::null(); + } + + // Reject ISEQs that are too long, + // this will allow us to use u16 for instruction indices if we want to, + // very long ISEQs are also much more likely to be initialization code + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + if iseq_size >= u16::MAX as u32 { + incr_counter!(iseq_too_long); + return std::ptr::null(); + } + + // If a custom call threshold was not specified on the command-line and + // this is a large application (has very many ISEQs), switch to + // using the call threshold for large applications after this entry point + use crate::stats::rb_yjit_live_iseq_count; + if unsafe { rb_yjit_call_threshold } == SMALL_CALL_THRESHOLD && unsafe { rb_yjit_live_iseq_count } > LARGE_ISEQ_COUNT { + unsafe { rb_yjit_call_threshold = LARGE_CALL_THRESHOLD; }; + } + + let maybe_code_ptr = with_compile_time(|| { gen_entry_point(iseq, ec, jit_exception) }); match maybe_code_ptr { - Some(ptr) => ptr.raw_ptr(), + Some(ptr) => ptr, None => std::ptr::null(), } } @@ -86,11 +180,67 @@ pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { return Qnil; } - let cb = CodegenGlobals::get_inline_cb(); - cb.code_gc(); + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + cb.code_gc(ocb); + }); + Qnil } +/// Enable YJIT compilation, returning true if YJIT was previously disabled +#[no_mangle] +pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE, gen_log: VALUE, print_log: VALUE, mem_size: VALUE, call_threshold: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || { + + if !mem_size.nil_p() { + let mem_size_mb = mem_size.as_isize() >> 1; + let mem_size_bytes = mem_size_mb * 1024 * 1024; + unsafe { + OPTIONS.mem_size = mem_size_bytes as usize; + } + } + + if !call_threshold.nil_p() { + let threshold = call_threshold.as_isize() >> 1; + unsafe { + rb_yjit_call_threshold = threshold as u64; + } + } + + // Initialize and enable YJIT + if gen_stats.test() { + unsafe { + OPTIONS.gen_stats = gen_stats.test(); + OPTIONS.print_stats = print_stats.test(); + } + } + + if gen_log.test() { + unsafe { + if print_log.test() { + OPTIONS.log = Some(LogOutput::Stderr); + } else { + OPTIONS.log = Some(LogOutput::MemoryOnly); + } + + Log::init(); + } + } + + yjit_init(); + + // Add "+YJIT" to RUBY_DESCRIPTION + extern "C" { + fn ruby_set_yjit_description(); + } + unsafe { ruby_set_yjit_description(); } + + Qtrue + }) +} + /// Simulate a situation where we are out of executable memory #[no_mangle] pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { @@ -109,3 +259,19 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA return Qnil; } + +/// Push a C method frame if the given PC is supposed to lazily push one. +/// This is called from rb_raise() (at rb_exc_new_str()) and other functions +/// that may make a method call (e.g. rb_to_int()). +#[no_mangle] +pub extern "C" fn rb_yjit_lazy_push_frame(pc: *mut VALUE) { + if !yjit_enabled_p() { + return; + } + + incr_counter!(num_lazy_frame_check); + if let Some(&(cme, recv_idx)) = CodegenGlobals::get_pc_to_cfunc().get(&pc) { + incr_counter!(num_lazy_frame_push); + unsafe { rb_vm_push_cfunc_frame(cme, recv_idx as i32) } + } +} |
