diff options
author | Kevin Newton <kddnewton@gmail.com> | 2022-07-08 13:01:21 -0400 |
---|---|---|
committer | Takashi Kokubun <takashikkbn@gmail.com> | 2022-08-29 08:46:59 -0700 |
commit | 6773832ab9cad3c7bcb3b93ef85a4bcfc9b3a4e3 (patch) | |
tree | b377f91fb5597d0ee141485205a2638cb35f75c8 | |
parent | 0551115912fd6682187dd501275096fdb7570084 (diff) |
More Arm64 lowering/backend work (https://github.com/Shopify/ruby/pull/307)
* More Arm64 lowering/backend work
* We now have encoding support for the LDR instruction for loading a PC-relative memory location
* You can now call add/adds/sub/subs with signed immediates, which switches appropriately based on sign
* We can now load immediates into registers appropriately, attempting to keep the minimal number of instructions:
* If it fits into 16 bytes, we use just a single movz.
* Else if it can be encoded into a bitmask immediate, we use a single mov.
* Otherwise we use a movz, a movk, and then optionally another one or two movks.
* Fixed a bunch of code to do with the Op::Load opcode.
* We now handle GC-offsets properly for Op::Load by skipping around them with a jump instruction. (This will be made better by constant pools in the future.)
* Op::Lea is doing what it's supposed to do now.
* Fixed a bug in the backend tests to do with not using the result of an Op::Add.
* Fix the remaining tests for Arm64
* Move split loads logic into each backend
-rw-r--r-- | yjit/src/asm/arm64/inst/load_literal.rs | 89 | ||||
-rw-r--r-- | yjit/src/asm/arm64/inst/mod.rs | 2 | ||||
-rw-r--r-- | yjit/src/asm/arm64/mod.rs | 131 | ||||
-rw-r--r-- | yjit/src/asm/arm64/opnd.rs | 1 | ||||
-rw-r--r-- | yjit/src/asm/mod.rs | 2 | ||||
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 172 | ||||
-rw-r--r-- | yjit/src/backend/ir.rs | 65 | ||||
-rw-r--r-- | yjit/src/backend/tests.rs | 27 | ||||
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 105 |
9 files changed, 431 insertions, 163 deletions
diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs new file mode 100644 index 0000000000..a49130c3eb --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -0,0 +1,89 @@ +/// The size of the operands being operated on. +enum Opc { + Size32 = 0b00, + Size64 = 0b01, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From<u8> for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Size64, + 32 => Opc::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load literal instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 0 0 | +/// | opc.. imm19........................................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadLiteral { + /// The number of the register to load the value into. + rt: u8, + + /// The PC-relative number of instructions to load the value from. + imm19: i32, + + /// The size of the operands being operated on. + opc: Opc +} + +impl LoadLiteral { + /// LDR (load literal) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en + pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self { + Self { rt, imm19, opc: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<LoadLiteral> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadLiteral) -> Self { + let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1); + + 0 + | ((inst.opc as u32) << 30) + | (1 << 28) + | (FAMILY << 25) + | (imm19 << 5) + | (inst.rt as u32) + } +} + +impl From<LoadLiteral> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadLiteral) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_positive() { + let inst = LoadLiteral::ldr(0, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x580000a0, result); + } + + #[test] + fn test_ldr_negative() { + let inst = LoadLiteral::ldr(0, -5, 64); + let result: u32 = inst.into(); + assert_eq!(0x58ffff60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index ae589ca564..f402f6765a 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -9,6 +9,7 @@ mod call; mod data_imm; mod data_reg; mod load; +mod load_literal; mod logical_imm; mod logical_reg; mod mov; @@ -24,6 +25,7 @@ pub use call::Call; pub use data_imm::DataImm; pub use data_reg::DataReg; pub use load::Load; +pub use load_literal::LoadLiteral; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index ced8b262c5..2dc5aa9388 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -39,11 +39,21 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::sub(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to add instruction."), }; @@ -68,6 +78,16 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::subs(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } + }, _ => panic!("Invalid operand combination to adds instruction."), }; @@ -237,6 +257,18 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDR - load a PC-relative memory address into a register +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -415,11 +447,21 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::add(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to sub instruction."), }; @@ -438,11 +480,21 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::subs(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + if imm12 < 0 { + DataImm::adds(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to subs instruction."), }; @@ -513,26 +565,46 @@ mod tests { } #[test] - fn test_add_register() { + fn test_add_reg() { check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); } #[test] - fn test_add_immediate() { + fn test_add_uimm() { check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); } #[test] - fn test_adds_register() { + fn test_add_imm_positive() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_add_imm_negative() { + check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adds_reg() { check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); } #[test] - fn test_adds_immediate() { + fn test_adds_uimm() { check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); } #[test] + fn test_adds_imm_positive() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_adds_imm_negatve() { + check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] fn test_and_register() { check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); } @@ -598,6 +670,11 @@ mod tests { } #[test] + fn test_ldr() { + check_bytes("40010058", |cb| ldr(cb, X0, 10)); + } + + #[test] fn test_ldur_memory() { check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); } @@ -678,22 +755,42 @@ mod tests { } #[test] - fn test_sub_register() { + fn test_sub_reg() { check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); } #[test] - fn test_sub_immediate() { + fn test_sub_uimm() { check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); } #[test] - fn test_subs_register() { + fn test_sub_imm_positive() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_sub_imm_negative() { + check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_reg() { check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); } #[test] - fn test_subs_immediate() { + fn test_subs_imm_positive() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_subs_imm_negative() { + check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_uimm() { check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); } diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index 6c06d2db3c..1738f0985c 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -175,3 +175,4 @@ pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 }; // C argument registers pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; +pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 5723406aec..126c9a8548 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -174,7 +174,7 @@ impl CodeBlock { } /// Write multiple bytes starting from the current position. - fn write_bytes(&mut self, bytes: &[u8]) { + pub fn write_bytes(&mut self, bytes: &[u8]) { for byte in bytes { self.write_byte(*byte); } diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 061d21d19b..7e6a187f8f 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -61,10 +61,7 @@ impl Assembler /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec<Reg> { - vec![ - X12_REG, - X13_REG - ] + vec![C_RET_REG, X12_REG] } /// Split platform-specific instructions @@ -75,8 +72,21 @@ impl Assembler fn arm64_split(mut self) -> Assembler { self.forward_pass(|asm, index, op, opnds, target| { + // Load all Value operands into registers that aren't already a part + // of Load instructions. + let opnds = match op { + Op::Load => opnds, + _ => opnds.into_iter().map(|opnd| { + if let Opnd::Value(_) = opnd { + asm.load(opnd) + } else { + opnd + } + }).collect() + }; + match op { - Op::Add | Op::Sub => { + Op::Add | Op::And | Op::Sub => { // Check if one of the operands is a register. If it is, // then we'll make that the first operand. match (opnds[0], opnds[1]) { @@ -95,6 +105,28 @@ impl Assembler } } }, + Op::CCall => { + assert!(opnds.len() < C_ARG_REGS.len()); + + // For each of the operands we're going to first load them + // into a register and then move them into the correct + // argument register. + for (idx, opnd) in opnds.into_iter().enumerate() { + let value = asm.load(opnd); + asm.mov(Opnd::Reg(C_ARG_REGREGS[idx]), value); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); + }, + Op::CRet => { + if opnds[0] != Opnd::Reg(C_RET_REG) { + let value = asm.load(opnds[0]); + asm.mov(C_RET_OPND, value); + } + asm.cret(C_RET_OPND); + }, Op::IncrCounter => { // Every operand to the IncrCounter instruction need to be a // register once it gets there. So here we're going to load @@ -154,6 +186,16 @@ impl Assembler asm.store(opnds[0], opnd1); }, + Op::Test => { + // The value being tested must be in a register, so if it's + // not already one we'll load it first. + let opnd0 = match opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], + _ => asm.load(opnds[0]) + }; + + asm.test(opnd0, opnds[1]); + }, _ => { asm.push_insn(op, opnds, target); } @@ -165,6 +207,45 @@ impl Assembler /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> { + /// Emit the required instructions to load the given value into the + /// given register. Our goal here is to use as few instructions as + /// possible to get this value into the register. + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + } + } + } + /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { @@ -203,7 +284,7 @@ impl Assembler // wasn't met, in which case we'll jump past the // next instruction that perform the direct jump. b(cb, A64Opnd::new_imm(8)); - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } } @@ -257,25 +338,57 @@ impl Assembler stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); }, Op::Load => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - - // This assumes only load instructions can contain - // references to GC'd Value operands. If the value being - // loaded is a heap object, we'll report that back out to - // the gc_offsets list. - if let Opnd::Value(val) = insn.opnds[0] { - if !val.special_const_p() { - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + match insn.opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + }, + Opnd::UImm(uimm) => { + emit_load_value(cb, insn.out.into(), uimm); + }, + Opnd::Imm(imm) => { + emit_load_value(cb, insn.out.into(), imm as u64); + }, + Opnd::Mem(_) => { + ldur(cb, insn.out.into(), insn.opnds[0].into()); + }, + Opnd::Value(value) => { + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr(cb, insn.out.into(), 1); + b(cb, A64Opnd::new_uimm((SIZEOF_VALUE as u64) / 4)); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + if !value.special_const_p() { + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + }, + Opnd::None => { + unreachable!("Attempted to load from None operand"); } - } + }; }, Op::Mov => { mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, Op::Lea => { - ldur(cb, insn.out.into(), insn.opnds[0].into()); + let opnd: A64Opnd = insn.opnds[0].into(); + + match opnd { + A64Opnd::Mem(mem) => { + add( + cb, + insn.out.into(), + A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), + A64Opnd::new_imm(mem.disp.into()) + ); + }, + _ => { + panic!("Op::Lea only accepts Opnd::Mem operands."); + } + }; }, Op::CPush => { add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); @@ -286,14 +399,6 @@ impl Assembler sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); }, Op::CCall => { - // Temporary - assert!(insn.opnds.len() < C_ARG_REGS.len()); - - // For each operand - for (idx, opnd) in insn.opnds.iter().enumerate() { - mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); - } - let src_addr = cb.get_write_ptr().into_i64() + 4; let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; @@ -310,17 +415,12 @@ impl Assembler if b_offset_fits_bits(offset) { bl(cb, A64Opnd::new_imm(offset / 4)); } else { - mov(cb, X30, A64Opnd::new_uimm(src_addr as u64)); - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X30, src_addr as u64); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } }, Op::CRet => { - // TODO: bias allocation towards return register - if insn.opnds[0] != Opnd::Reg(C_RET_REG) { - mov(cb, C_RET_OPND.into(), insn.opnds[0].into()); - } - ret(cb, A64Opnd::None); }, Op::Cmp => { @@ -351,7 +451,7 @@ impl Assembler if b_offset_fits_bits(offset) { b(cb, A64Opnd::new_imm(offset / 4)); } else { - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } }, @@ -398,7 +498,7 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> { - let mut asm = self.arm64_split().split_loads().alloc_regs(regs); + let mut asm = self.arm64_split().alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index c9e75df01a..cd88ec560b 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -534,71 +534,6 @@ impl Assembler asm } - /// Transforms the instructions by splitting instructions that cannot be - /// represented in the final architecture into multiple instructions that - /// can. - pub(super) fn split_loads(self) -> Assembler - { - // Load operands that are GC values into a register - fn load_gc_opnds(op: Op, opnds: Vec<Opnd>, asm: &mut Assembler) -> Vec<Opnd> - { - if op == Op::Load || op == Op::Mov { - return opnds; - } - - fn map_opnd(opnd: Opnd, asm: &mut Assembler) -> Opnd { - if let Opnd::Value(val) = opnd { - // If this is a heap object, load it into a register - if !val.special_const_p() { - asm.load(opnd); - } - } - - opnd - } - - opnds.into_iter().map(|opnd| map_opnd(opnd, asm)).collect() - } - - self.forward_pass(|asm, _, op, opnds, target| { - // Load heap object operands into registers because most - // instructions can't directly work with 64-bit constants - let opnds = load_gc_opnds(op, opnds, asm); - - match op { - // Check for Add, Sub, And, Mov, with two memory operands. - // Load one operand into memory. - Op::Add | Op::Sub | Op::And | Op::Mov => { - match opnds.as_slice() { - [Opnd::Mem(_), Opnd::Mem(_)] => { - // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnds[0], opnd1], None); - }, - - [Opnd::Mem(_), Opnd::UImm(val)] => { - if uimm_num_bits(*val) > 32 { - let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnds[0], opnd1], None); - } - else - { - asm.push_insn(op, opnds, target); - } - }, - - _ => { - asm.push_insn(op, opnds, target); - } - } - }, - _ => { - asm.push_insn(op, opnds, target); - } - }; - }) - } - /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index d72f0ec0ac..27f799fc31 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -44,22 +44,6 @@ fn test_add() { } #[test] -fn test_split_loads() { - let mut asm = Assembler::new(); - - let regs = Assembler::get_alloc_regs(); - - asm.add( - Opnd::mem(64, Opnd::Reg(regs[0]), 0), - Opnd::mem(64, Opnd::Reg(regs[1]), 0) - ); - - let result = asm.split_loads(); - assert_eq!(result.insns.len(), 2); - assert_eq!(result.insns[0].op, Op::Load); -} - -#[test] fn test_alloc_regs() { let mut asm = Assembler::new(); @@ -109,7 +93,8 @@ fn test_compile() let regs = Assembler::get_alloc_regs(); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); - asm.add(out, Opnd::UImm(2)); + let out2 = asm.add(out, Opnd::UImm(2)); + asm.store(Opnd::mem(64, SP, 0), out2); asm.compile_with_num_regs(&mut cb, 1); } @@ -162,7 +147,7 @@ fn test_reuse_reg() let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); - let v2 = asm.add(v0, Opnd::UImm(1)); // Reuse v1 register + let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register let v3 = asm.add(v0, v2); asm.store(Opnd::mem(64, SP, 0), v2); @@ -202,7 +187,7 @@ fn test_base_insn_out() // Increment and store the updated value asm.incr_counter(counter_opnd, 1.into()); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } #[test] @@ -262,7 +247,7 @@ fn test_jcc_ptr() ); asm.jnz(side_exit); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } /// Direct jump to a stub e.g. for deferred compilation @@ -293,5 +278,5 @@ fn test_jo() asm.mov(Opnd::mem(64, SP, 0), out_val); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7a26650549..4fd30e7144 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] #![allow(unused_imports)] -use crate::asm::{CodeBlock}; +use crate::asm::{uimm_num_bits, CodeBlock}; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; @@ -82,36 +82,97 @@ impl Assembler let live_ranges: Vec<usize> = std::mem::take(&mut self.live_ranges); self.forward_pass(|asm, index, op, opnds, target| { + // Load heap object operands into registers because most + // instructions can't directly work with 64-bit constants + let opnds = match op { + Op::Load | Op::Mov => opnds, + _ => opnds.into_iter().map(|opnd| { + if let Opnd::Value(value) = opnd { + if !value.special_const_p() { + asm.load(opnd) + } else { + opnd + } + } else { + opnd + } + }).collect() + }; + match op { - Op::Add | Op::Sub | Op::And | Op::Not => { - match opnds[0] { + Op::Add | Op::Sub | Op::And => { + let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + (asm.load(opnds[0]), asm.load(opnds[1])) + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + if uimm_num_bits(value) > 32 { + (asm.load(opnds[0]), asm.load(opnds[1])) + } else { + (asm.load(opnds[0]), opnds[1]) + } + }, // Instruction output whose live range spans beyond this instruction - Opnd::InsnOut{idx, ..} => { + (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { - let opnd0 = asm.load(opnds[0]); - let mut new_opnds = vec![opnd0]; - new_opnds.extend_from_slice(&opnds[1..]); - asm.push_insn(op, new_opnds, None); - return; + (asm.load(opnds[0]), opnds[1]) + } else { + (opnds[0], opnds[1]) } }, - // We have to load memory and register operands to avoid corrupting them - Opnd::Mem(_) | Opnd::Reg(_) => { - let opnd0 = asm.load(opnds[0]); - let mut new_opnds = vec![opnd0]; - new_opnds.extend_from_slice(&opnds[1..]); - asm.push_insn(op, new_opnds, None); - return; + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + (asm.load(opnds[0]), opnds[1]) }, + _ => (opnds[0], opnds[1]) + }; - _ => {} + asm.push_insn(op, vec![opnd0, opnd1], target); + }, + Op::Mov => { + match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(opnds[1]); + asm.mov(opnds[0], opnd1); + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + if uimm_num_bits(value) > 32 { + let opnd1 = asm.load(opnds[1]); + asm.mov(opnds[0], opnd1); + } else { + asm.mov(opnds[0], opnds[1]); + } + }, + _ => { + asm.mov(opnds[0], opnds[1]); + } } }, - _ => {} + Op::Not => { + let opnd0 = match opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + asm.load(opnds[0]) + } else { + opnds[0] + } + }, + // We have to load memory and register operands to avoid + // corrupting them. + Opnd::Mem(_) | Opnd::Reg(_) => asm.load(opnds[0]), + // Otherwise we can just reuse the existing operand. + _ => opnds[0] + }; + + asm.not(opnd0); + }, + _ => { + asm.push_insn(op, opnds, target); + } }; - - asm.push_insn(op, opnds, target); }) } @@ -270,9 +331,7 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> { - let mut asm = self.x86_split(); - let mut asm = asm.split_loads(); - let mut asm = asm.alloc_regs(regs); + let mut asm = self.x86_split().alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { |