summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2022-07-08 13:01:21 -0400
committerTakashi Kokubun <takashikkbn@gmail.com>2022-08-29 08:46:59 -0700
commit6773832ab9cad3c7bcb3b93ef85a4bcfc9b3a4e3 (patch)
treeb377f91fb5597d0ee141485205a2638cb35f75c8
parent0551115912fd6682187dd501275096fdb7570084 (diff)
More Arm64 lowering/backend work (https://github.com/Shopify/ruby/pull/307)
* More Arm64 lowering/backend work * We now have encoding support for the LDR instruction for loading a PC-relative memory location * You can now call add/adds/sub/subs with signed immediates, which switches appropriately based on sign * We can now load immediates into registers appropriately, attempting to keep the minimal number of instructions: * If it fits into 16 bytes, we use just a single movz. * Else if it can be encoded into a bitmask immediate, we use a single mov. * Otherwise we use a movz, a movk, and then optionally another one or two movks. * Fixed a bunch of code to do with the Op::Load opcode. * We now handle GC-offsets properly for Op::Load by skipping around them with a jump instruction. (This will be made better by constant pools in the future.) * Op::Lea is doing what it's supposed to do now. * Fixed a bug in the backend tests to do with not using the result of an Op::Add. * Fix the remaining tests for Arm64 * Move split loads logic into each backend
-rw-r--r--yjit/src/asm/arm64/inst/load_literal.rs89
-rw-r--r--yjit/src/asm/arm64/inst/mod.rs2
-rw-r--r--yjit/src/asm/arm64/mod.rs131
-rw-r--r--yjit/src/asm/arm64/opnd.rs1
-rw-r--r--yjit/src/asm/mod.rs2
-rw-r--r--yjit/src/backend/arm64/mod.rs172
-rw-r--r--yjit/src/backend/ir.rs65
-rw-r--r--yjit/src/backend/tests.rs27
-rw-r--r--yjit/src/backend/x86_64/mod.rs105
9 files changed, 431 insertions, 163 deletions
diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs
new file mode 100644
index 0000000000..a49130c3eb
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_literal.rs
@@ -0,0 +1,89 @@
+/// The size of the operands being operated on.
+enum Opc {
+ Size32 = 0b00,
+ Size64 = 0b01,
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into an Sf enum variant.
+impl From<u8> for Opc {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Opc::Size64,
+ 32 => Opc::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 load literal instruction that can be encoded.
+///
+/// LDR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 1 0 0 0 |
+/// | opc.. imm19........................................................... rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadLiteral {
+ /// The number of the register to load the value into.
+ rt: u8,
+
+ /// The PC-relative number of instructions to load the value from.
+ imm19: i32,
+
+ /// The size of the operands being operated on.
+ opc: Opc
+}
+
+impl LoadLiteral {
+ /// LDR (load literal)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
+ pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self {
+ Self { rt, imm19, opc: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadLiteral> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadLiteral) -> Self {
+ let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1);
+
+ 0
+ | ((inst.opc as u32) << 30)
+ | (1 << 28)
+ | (FAMILY << 25)
+ | (imm19 << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadLiteral> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadLiteral) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldr_positive() {
+ let inst = LoadLiteral::ldr(0, 5, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x580000a0, result);
+ }
+
+ #[test]
+ fn test_ldr_negative() {
+ let inst = LoadLiteral::ldr(0, -5, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x58ffff60, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index ae589ca564..f402f6765a 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -9,6 +9,7 @@ mod call;
mod data_imm;
mod data_reg;
mod load;
+mod load_literal;
mod logical_imm;
mod logical_reg;
mod mov;
@@ -24,6 +25,7 @@ pub use call::Call;
pub use data_imm::DataImm;
pub use data_reg::DataReg;
pub use load::Load;
+pub use load_literal::LoadLiteral;
pub use logical_imm::LogicalImm;
pub use logical_reg::LogicalReg;
pub use mov::Mov;
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index ced8b262c5..2dc5aa9388 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -39,11 +39,21 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
},
- (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
- assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+ assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less.");
- DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ DataImm::add(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+
+ if imm12 < 0 {
+ DataImm::sub(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into()
+ } else {
+ DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ }
},
_ => panic!("Invalid operand combination to add instruction."),
};
@@ -68,6 +78,16 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
},
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+
+ if imm12 < 0 {
+ DataImm::subs(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into()
+ } else {
+ DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ }
+ },
_ => panic!("Invalid operand combination to adds instruction."),
};
@@ -237,6 +257,18 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
cb.write_bytes(&bytes);
}
+/// LDR - load a PC-relative memory address into a register
+pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
+ let bytes: [u8; 4] = match rt {
+ A64Opnd::Reg(rt) => {
+ LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
/// LDUR - load a memory address into a register
pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt, rn) {
@@ -415,11 +447,21 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
},
- (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
- assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+ assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less.");
- DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ DataImm::sub(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+
+ if imm12 < 0 {
+ DataImm::add(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into()
+ } else {
+ DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ }
},
_ => panic!("Invalid operand combination to sub instruction."),
};
@@ -438,11 +480,21 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
},
- (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
- assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
+ assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less.");
+
+ DataImm::subs(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less.");
- DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ if imm12 < 0 {
+ DataImm::adds(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into()
+ } else {
+ DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into()
+ }
},
_ => panic!("Invalid operand combination to subs instruction."),
};
@@ -513,26 +565,46 @@ mod tests {
}
#[test]
- fn test_add_register() {
+ fn test_add_reg() {
check_bytes("2000028b", |cb| add(cb, X0, X1, X2));
}
#[test]
- fn test_add_immediate() {
+ fn test_add_uimm() {
check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7)));
}
#[test]
- fn test_adds_register() {
+ fn test_add_imm_positive() {
+ check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_add_imm_negative() {
+ check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_adds_reg() {
check_bytes("200002ab", |cb| adds(cb, X0, X1, X2));
}
#[test]
- fn test_adds_immediate() {
+ fn test_adds_uimm() {
check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7)));
}
#[test]
+ fn test_adds_imm_positive() {
+ check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_adds_imm_negatve() {
+ check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
fn test_and_register() {
check_bytes("2000028a", |cb| and(cb, X0, X1, X2));
}
@@ -598,6 +670,11 @@ mod tests {
}
#[test]
+ fn test_ldr() {
+ check_bytes("40010058", |cb| ldr(cb, X0, 10));
+ }
+
+ #[test]
fn test_ldur_memory() {
check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123)));
}
@@ -678,22 +755,42 @@ mod tests {
}
#[test]
- fn test_sub_register() {
+ fn test_sub_reg() {
check_bytes("200002cb", |cb| sub(cb, X0, X1, X2));
}
#[test]
- fn test_sub_immediate() {
+ fn test_sub_uimm() {
check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7)));
}
#[test]
- fn test_subs_register() {
+ fn test_sub_imm_positive() {
+ check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_sub_imm_negative() {
+ check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_subs_reg() {
check_bytes("200002eb", |cb| subs(cb, X0, X1, X2));
}
#[test]
- fn test_subs_immediate() {
+ fn test_subs_imm_positive() {
+ check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_subs_imm_negative() {
+ check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_subs_uimm() {
check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7)));
}
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index 6c06d2db3c..1738f0985c 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -175,3 +175,4 @@ pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 };
// C argument registers
pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3];
+pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG];
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 5723406aec..126c9a8548 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -174,7 +174,7 @@ impl CodeBlock {
}
/// Write multiple bytes starting from the current position.
- fn write_bytes(&mut self, bytes: &[u8]) {
+ pub fn write_bytes(&mut self, bytes: &[u8]) {
for byte in bytes {
self.write_byte(*byte);
}
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 061d21d19b..7e6a187f8f 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -61,10 +61,7 @@ impl Assembler
/// Get the list of registers from which we can allocate on this platform
pub fn get_alloc_regs() -> Vec<Reg>
{
- vec![
- X12_REG,
- X13_REG
- ]
+ vec![C_RET_REG, X12_REG]
}
/// Split platform-specific instructions
@@ -75,8 +72,21 @@ impl Assembler
fn arm64_split(mut self) -> Assembler
{
self.forward_pass(|asm, index, op, opnds, target| {
+ // Load all Value operands into registers that aren't already a part
+ // of Load instructions.
+ let opnds = match op {
+ Op::Load => opnds,
+ _ => opnds.into_iter().map(|opnd| {
+ if let Opnd::Value(_) = opnd {
+ asm.load(opnd)
+ } else {
+ opnd
+ }
+ }).collect()
+ };
+
match op {
- Op::Add | Op::Sub => {
+ Op::Add | Op::And | Op::Sub => {
// Check if one of the operands is a register. If it is,
// then we'll make that the first operand.
match (opnds[0], opnds[1]) {
@@ -95,6 +105,28 @@ impl Assembler
}
}
},
+ Op::CCall => {
+ assert!(opnds.len() < C_ARG_REGS.len());
+
+ // For each of the operands we're going to first load them
+ // into a register and then move them into the correct
+ // argument register.
+ for (idx, opnd) in opnds.into_iter().enumerate() {
+ let value = asm.load(opnd);
+ asm.mov(Opnd::Reg(C_ARG_REGREGS[idx]), value);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]);
+ },
+ Op::CRet => {
+ if opnds[0] != Opnd::Reg(C_RET_REG) {
+ let value = asm.load(opnds[0]);
+ asm.mov(C_RET_OPND, value);
+ }
+ asm.cret(C_RET_OPND);
+ },
Op::IncrCounter => {
// Every operand to the IncrCounter instruction need to be a
// register once it gets there. So here we're going to load
@@ -154,6 +186,16 @@ impl Assembler
asm.store(opnds[0], opnd1);
},
+ Op::Test => {
+ // The value being tested must be in a register, so if it's
+ // not already one we'll load it first.
+ let opnd0 = match opnds[0] {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0],
+ _ => asm.load(opnds[0])
+ };
+
+ asm.test(opnd0, opnds[1]);
+ },
_ => {
asm.push_insn(op, opnds, target);
}
@@ -165,6 +207,45 @@ impl Assembler
/// Returns a list of GC offsets
pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32>
{
+ /// Emit the required instructions to load the given value into the
+ /// given register. Our goal here is to use as few instructions as
+ /// possible to get this value into the register.
+ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) {
+ let mut current = value;
+
+ if current <= 0xffff {
+ // If the value fits into a single movz
+ // instruction, then we'll use that.
+ movz(cb, rd, A64Opnd::new_uimm(current), 0);
+ } else if BitmaskImmediate::try_from(current).is_ok() {
+ // Otherwise, if the immediate can be encoded
+ // with the special bitmask immediate encoding,
+ // we'll use that.
+ mov(cb, rd, A64Opnd::new_uimm(current));
+ } else {
+ // Finally we'll fall back to encoding the value
+ // using movz for the first 16 bits and movk for
+ // each subsequent set of 16 bits as long we
+ // they are necessary.
+ movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
+
+ // (We're sure this is necessary since we
+ // checked if it only fit into movz above).
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
+ }
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
+ }
+ }
+ }
+
/// Emit a conditional jump instruction to a specific target. This is
/// called when lowering any of the conditional jump instructions.
fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) {
@@ -203,7 +284,7 @@ impl Assembler
// wasn't met, in which case we'll jump past the
// next instruction that perform the direct jump.
b(cb, A64Opnd::new_imm(8));
- mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+ emit_load_value(cb, X29, dst_addr as u64);
br(cb, X29);
}
}
@@ -257,25 +338,57 @@ impl Assembler
stur(cb, insn.opnds[1].into(), insn.opnds[0].into());
},
Op::Load => {
- mov(cb, insn.out.into(), insn.opnds[0].into());
-
- // This assumes only load instructions can contain
- // references to GC'd Value operands. If the value being
- // loaded is a heap object, we'll report that back out to
- // the gc_offsets list.
- if let Opnd::Value(val) = insn.opnds[0] {
- if !val.special_const_p() {
- // The pointer immediate is encoded as the last part of the mov written out
- let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
- gc_offsets.push(ptr_offset);
+ match insn.opnds[0] {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => {
+ mov(cb, insn.out.into(), insn.opnds[0].into());
+ },
+ Opnd::UImm(uimm) => {
+ emit_load_value(cb, insn.out.into(), uimm);
+ },
+ Opnd::Imm(imm) => {
+ emit_load_value(cb, insn.out.into(), imm as u64);
+ },
+ Opnd::Mem(_) => {
+ ldur(cb, insn.out.into(), insn.opnds[0].into());
+ },
+ Opnd::Value(value) => {
+ // This assumes only load instructions can contain
+ // references to GC'd Value operands. If the value
+ // being loaded is a heap object, we'll report that
+ // back out to the gc_offsets list.
+ ldr(cb, insn.out.into(), 1);
+ b(cb, A64Opnd::new_uimm((SIZEOF_VALUE as u64) / 4));
+ cb.write_bytes(&value.as_u64().to_le_bytes());
+
+ if !value.special_const_p() {
+ let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ gc_offsets.push(ptr_offset);
+ }
+ },
+ Opnd::None => {
+ unreachable!("Attempted to load from None operand");
}
- }
+ };
},
Op::Mov => {
mov(cb, insn.opnds[0].into(), insn.opnds[1].into());
},
Op::Lea => {
- ldur(cb, insn.out.into(), insn.opnds[0].into());
+ let opnd: A64Opnd = insn.opnds[0].into();
+
+ match opnd {
+ A64Opnd::Mem(mem) => {
+ add(
+ cb,
+ insn.out.into(),
+ A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }),
+ A64Opnd::new_imm(mem.disp.into())
+ );
+ },
+ _ => {
+ panic!("Op::Lea only accepts Opnd::Mem operands.");
+ }
+ };
},
Op::CPush => {
add(cb, C_SP_REG, C_SP_REG, C_SP_STEP);
@@ -286,14 +399,6 @@ impl Assembler
sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP);
},
Op::CCall => {
- // Temporary
- assert!(insn.opnds.len() < C_ARG_REGS.len());
-
- // For each operand
- for (idx, opnd) in insn.opnds.iter().enumerate() {
- mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into());
- }
-
let src_addr = cb.get_write_ptr().into_i64() + 4;
let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64;
@@ -310,17 +415,12 @@ impl Assembler
if b_offset_fits_bits(offset) {
bl(cb, A64Opnd::new_imm(offset / 4));
} else {
- mov(cb, X30, A64Opnd::new_uimm(src_addr as u64));
- mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+ emit_load_value(cb, X30, src_addr as u64);
+ emit_load_value(cb, X29, dst_addr as u64);
br(cb, X29);
}
},
Op::CRet => {
- // TODO: bias allocation towards return register
- if insn.opnds[0] != Opnd::Reg(C_RET_REG) {
- mov(cb, C_RET_OPND.into(), insn.opnds[0].into());
- }
-
ret(cb, A64Opnd::None);
},
Op::Cmp => {
@@ -351,7 +451,7 @@ impl Assembler
if b_offset_fits_bits(offset) {
b(cb, A64Opnd::new_imm(offset / 4));
} else {
- mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64));
+ emit_load_value(cb, X29, dst_addr as u64);
br(cb, X29);
}
},
@@ -398,7 +498,7 @@ impl Assembler
/// Optimize and compile the stored instructions
pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32>
{
- let mut asm = self.arm64_split().split_loads().alloc_regs(regs);
+ let mut asm = self.arm64_split().alloc_regs(regs);
// Create label instances in the code block
for (idx, name) in asm.label_names.iter().enumerate() {
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index c9e75df01a..cd88ec560b 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -534,71 +534,6 @@ impl Assembler
asm
}
- /// Transforms the instructions by splitting instructions that cannot be
- /// represented in the final architecture into multiple instructions that
- /// can.
- pub(super) fn split_loads(self) -> Assembler
- {
- // Load operands that are GC values into a register
- fn load_gc_opnds(op: Op, opnds: Vec<Opnd>, asm: &mut Assembler) -> Vec<Opnd>
- {
- if op == Op::Load || op == Op::Mov {
- return opnds;
- }
-
- fn map_opnd(opnd: Opnd, asm: &mut Assembler) -> Opnd {
- if let Opnd::Value(val) = opnd {
- // If this is a heap object, load it into a register
- if !val.special_const_p() {
- asm.load(opnd);
- }
- }
-
- opnd
- }
-
- opnds.into_iter().map(|opnd| map_opnd(opnd, asm)).collect()
- }
-
- self.forward_pass(|asm, _, op, opnds, target| {
- // Load heap object operands into registers because most
- // instructions can't directly work with 64-bit constants
- let opnds = load_gc_opnds(op, opnds, asm);
-
- match op {
- // Check for Add, Sub, And, Mov, with two memory operands.
- // Load one operand into memory.
- Op::Add | Op::Sub | Op::And | Op::Mov => {
- match opnds.as_slice() {
- [Opnd::Mem(_), Opnd::Mem(_)] => {
- // We load opnd1 because for mov, opnd0 is the output
- let opnd1 = asm.load(opnds[1]);
- asm.push_insn(op, vec![opnds[0], opnd1], None);
- },
-
- [Opnd::Mem(_), Opnd::UImm(val)] => {
- if uimm_num_bits(*val) > 32 {
- let opnd1 = asm.load(opnds[1]);
- asm.push_insn(op, vec![opnds[0], opnd1], None);
- }
- else
- {
- asm.push_insn(op, opnds, target);
- }
- },
-
- _ => {
- asm.push_insn(op, opnds, target);
- }
- }
- },
- _ => {
- asm.push_insn(op, opnds, target);
- }
- };
- })
- }
-
/// Sets the out field on the various instructions that require allocated
/// registers because their output is used as the operand on a subsequent
/// instruction. This is our implementation of the linear scan algorithm.
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
index d72f0ec0ac..27f799fc31 100644
--- a/yjit/src/backend/tests.rs
+++ b/yjit/src/backend/tests.rs
@@ -44,22 +44,6 @@ fn test_add() {
}
#[test]
-fn test_split_loads() {
- let mut asm = Assembler::new();
-
- let regs = Assembler::get_alloc_regs();
-
- asm.add(
- Opnd::mem(64, Opnd::Reg(regs[0]), 0),
- Opnd::mem(64, Opnd::Reg(regs[1]), 0)
- );
-
- let result = asm.split_loads();
- assert_eq!(result.insns.len(), 2);
- assert_eq!(result.insns[0].op, Op::Load);
-}
-
-#[test]
fn test_alloc_regs() {
let mut asm = Assembler::new();
@@ -109,7 +93,8 @@ fn test_compile()
let regs = Assembler::get_alloc_regs();
let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2));
- asm.add(out, Opnd::UImm(2));
+ let out2 = asm.add(out, Opnd::UImm(2));
+ asm.store(Opnd::mem(64, SP, 0), out2);
asm.compile_with_num_regs(&mut cb, 1);
}
@@ -162,7 +147,7 @@ fn test_reuse_reg()
let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1));
let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1));
- let v2 = asm.add(v0, Opnd::UImm(1)); // Reuse v1 register
+ let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register
let v3 = asm.add(v0, v2);
asm.store(Opnd::mem(64, SP, 0), v2);
@@ -202,7 +187,7 @@ fn test_base_insn_out()
// Increment and store the updated value
asm.incr_counter(counter_opnd, 1.into());
- asm.compile_with_num_regs(&mut cb, 1);
+ asm.compile_with_num_regs(&mut cb, 2);
}
#[test]
@@ -262,7 +247,7 @@ fn test_jcc_ptr()
);
asm.jnz(side_exit);
- asm.compile_with_num_regs(&mut cb, 1);
+ asm.compile_with_num_regs(&mut cb, 2);
}
/// Direct jump to a stub e.g. for deferred compilation
@@ -293,5 +278,5 @@ fn test_jo()
asm.mov(Opnd::mem(64, SP, 0), out_val);
- asm.compile_with_num_regs(&mut cb, 1);
+ asm.compile_with_num_regs(&mut cb, 2);
}
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index 7a26650549..4fd30e7144 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -2,7 +2,7 @@
#![allow(unused_variables)]
#![allow(unused_imports)]
-use crate::asm::{CodeBlock};
+use crate::asm::{uimm_num_bits, CodeBlock};
use crate::asm::x86_64::*;
use crate::codegen::{JITState};
use crate::cruby::*;
@@ -82,36 +82,97 @@ impl Assembler
let live_ranges: Vec<usize> = std::mem::take(&mut self.live_ranges);
self.forward_pass(|asm, index, op, opnds, target| {
+ // Load heap object operands into registers because most
+ // instructions can't directly work with 64-bit constants
+ let opnds = match op {
+ Op::Load | Op::Mov => opnds,
+ _ => opnds.into_iter().map(|opnd| {
+ if let Opnd::Value(value) = opnd {
+ if !value.special_const_p() {
+ asm.load(opnd)
+ } else {
+ opnd
+ }
+ } else {
+ opnd
+ }
+ }).collect()
+ };
+
match op {
- Op::Add | Op::Sub | Op::And | Op::Not => {
- match opnds[0] {
+ Op::Add | Op::Sub | Op::And => {
+ let (opnd0, opnd1) = match (opnds[0], opnds[1]) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ (asm.load(opnds[0]), asm.load(opnds[1]))
+ },
+ (Opnd::Mem(_), Opnd::UImm(value)) => {
+ if uimm_num_bits(value) > 32 {
+ (asm.load(opnds[0]), asm.load(opnds[1]))
+ } else {
+ (asm.load(opnds[0]), opnds[1])
+ }
+ },
// Instruction output whose live range spans beyond this instruction
- Opnd::InsnOut{idx, ..} => {
+ (Opnd::InsnOut { idx, .. }, _) => {
if live_ranges[idx] > index {
- let opnd0 = asm.load(opnds[0]);
- let mut new_opnds = vec![opnd0];
- new_opnds.extend_from_slice(&opnds[1..]);
- asm.push_insn(op, new_opnds, None);
- return;
+ (asm.load(opnds[0]), opnds[1])
+ } else {
+ (opnds[0], opnds[1])
}
},
-
// We have to load memory and register operands to avoid corrupting them
- Opnd::Mem(_) | Opnd::Reg(_) => {
- let opnd0 = asm.load(opnds[0]);
- let mut new_opnds = vec![opnd0];
- new_opnds.extend_from_slice(&opnds[1..]);
- asm.push_insn(op, new_opnds, None);
- return;
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ (asm.load(opnds[0]), opnds[1])
},
+ _ => (opnds[0], opnds[1])
+ };
- _ => {}
+ asm.push_insn(op, vec![opnd0, opnd1], target);
+ },
+ Op::Mov => {
+ match (opnds[0], opnds[1]) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ // We load opnd1 because for mov, opnd0 is the output
+ let opnd1 = asm.load(opnds[1]);
+ asm.mov(opnds[0], opnd1);
+ },
+ (Opnd::Mem(_), Opnd::UImm(value)) => {
+ if uimm_num_bits(value) > 32 {
+ let opnd1 = asm.load(opnds[1]);
+ asm.mov(opnds[0], opnd1);
+ } else {
+ asm.mov(opnds[0], opnds[1]);
+ }
+ },
+ _ => {
+ asm.mov(opnds[0], opnds[1]);
+ }
}
},
- _ => {}
+ Op::Not => {
+ let opnd0 = match opnds[0] {
+ // If we have an instruction output whose live range
+ // spans beyond this instruction, we have to load it.
+ Opnd::InsnOut { idx, .. } => {
+ if live_ranges[idx] > index {
+ asm.load(opnds[0])
+ } else {
+ opnds[0]
+ }
+ },
+ // We have to load memory and register operands to avoid
+ // corrupting them.
+ Opnd::Mem(_) | Opnd::Reg(_) => asm.load(opnds[0]),
+ // Otherwise we can just reuse the existing operand.
+ _ => opnds[0]
+ };
+
+ asm.not(opnd0);
+ },
+ _ => {
+ asm.push_insn(op, opnds, target);
+ }
};
-
- asm.push_insn(op, opnds, target);
})
}
@@ -270,9 +331,7 @@ impl Assembler
/// Optimize and compile the stored instructions
pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32>
{
- let mut asm = self.x86_split();
- let mut asm = asm.split_loads();
- let mut asm = asm.alloc_regs(regs);
+ let mut asm = self.x86_split().alloc_regs(regs);
// Create label instances in the code block
for (idx, name) in asm.label_names.iter().enumerate() {