diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2022-07-05 16:04:19 -0400 |
|---|---|---|
| committer | Takashi Kokubun <takashikkbn@gmail.com> | 2022-08-29 08:46:58 -0700 |
| commit | 7a9b581e0896d4aa7a037da90c837b830213c8e8 (patch) | |
| tree | 8d613c9cca2af21aa17840270b23acb233b9f3ff /yjit/src/backend | |
| parent | b272c57f27628ab114206c777d5b274713d31079 (diff) | |
Arm64 progress (https://github.com/Shopify/ruby/pull/304)
* Get initial wiring up
* Split IncrCounter instruction
* Breakpoints in Arm64
* Support for ORR
* MOV instruction encodings
* Implement JmpOpnd and CRet
* Add ORN
* Add MVN
* PUSH, POP, CCALL for Arm64
* Some formatting and implement Op::Not for Arm64
* Consistent constants when working with the Arm64 SP
* Allow OR-ing values into the memory buffer
* Test lowering Arm64 ADD
* Emit unconditional jumps consistently in Arm64
* Begin emitting conditional jumps for A64
* Back out some labelref changes
* Remove label API that no longer exists
* Use a trait for the label encoders
* Encode nop
* Add in nops so jumps are the same width no matter what on Arm64
* Op::Jbe for CodePtr
* Pass src_addr and dst_addr instead of calculated offset to label refs
* Even more jump work for Arm64
* Fix up jumps to use consistent assertions
* Handle splitting Add, Sub, and Not insns for Arm64
* More Arm64 splits and various fixes
* PR feedback for Arm64 support
* Split up jumps and conditional jump logic
Diffstat (limited to 'yjit/src/backend')
| -rw-r--r-- | yjit/src/backend/arm64/mod.rs | 410 | ||||
| -rw-r--r-- | yjit/src/backend/ir.rs | 7 | ||||
| -rw-r--r-- | yjit/src/backend/mod.rs | 7 | ||||
| -rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 3 |
4 files changed, 398 insertions, 29 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 4e4c553c9d..061d21d19b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -7,26 +7,51 @@ use crate::asm::arm64::*; use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; +use crate::virtualmem::CodePtr; // Use the arm64 register type for this platform pub type Reg = A64Reg; // Callee-saved registers -pub const _CFP: Opnd = Opnd::Reg(X9); -pub const _EC: Opnd = Opnd::Reg(X10); -pub const _SP: Opnd = Opnd::Reg(X11); +pub const _CFP: Opnd = Opnd::Reg(X24_REG); +pub const _EC: Opnd = Opnd::Reg(X25_REG); +pub const _SP: Opnd = Opnd::Reg(X26_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; // C return value register on this platform -pub const RET_REG: Reg = X0; +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: A64Opnd = A64Opnd::UImm(16); /// Map Opnd to A64Opnd impl From<Opnd> for A64Opnd { fn from(opnd: Opnd) -> Self { match opnd { - Opnd::UImm(val) => uimm_opnd(val), - Opnd::Imm(val) => imm_opnd(val), + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), Opnd::Reg(reg) => A64Opnd::Reg(reg), - _ => panic!("unsupported arm64 operand type") + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::None => panic!("attempted to lower an Opnd::None"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), } } } @@ -43,39 +68,368 @@ impl Assembler } /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. fn arm64_split(mut self) -> Assembler { - // The transformations done here are meant to make our lives simpler in later - // stages of the compilation pipeline. - // Here we may want to make sure that all instructions (except load and store) - // have no memory operands. + self.forward_pass(|asm, index, op, opnds, target| { + match op { + Op::Add | Op::Sub => { + // Check if one of the operands is a register. If it is, + // then we'll make that the first operand. + match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + let opnd0 = asm.load(opnds[0]); + let opnd1 = asm.load(opnds[1]); + asm.push_insn(op, vec![opnd0, opnd1], target); + }, + (mem_opnd @ Opnd::Mem(_), other_opnd) | + (other_opnd, mem_opnd @ Opnd::Mem(_)) => { + let opnd0 = asm.load(mem_opnd); + asm.push_insn(op, vec![opnd0, other_opnd], target); + }, + _ => { + asm.push_insn(op, opnds, target); + } + } + }, + Op::IncrCounter => { + // Every operand to the IncrCounter instruction need to be a + // register once it gets there. So here we're going to load + // anything that isn't a register first. + let new_opnds: Vec<Opnd> = opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Mem(_) | Opnd::Imm(_) | Opnd::UImm(_) => asm.load(opnd), + _ => opnd, + } + }).collect(); + + asm.incr_counter(new_opnds[0], new_opnds[1]); + }, + Op::Mov => { + // The value that is being moved must be either a register + // or an immediate that can be encoded as a bitmask + // immediate. Otherwise, we'll need to split the move into + // multiple instructions. + let value = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]), + Opnd::UImm(uimm) => { + if let Ok(encoded) = BitmaskImmediate::try_from(uimm) { + opnds[1] + } else { + asm.load(opnds[1]) + } + }, + _ => unreachable!() + }; + + /// If we're attempting to load into a memory operand, then + /// we'll switch over to the store instruction. Otherwise + /// we'll use the normal mov instruction. + match opnds[0] { + Opnd::Mem(_) => asm.store(opnds[0], value), + _ => asm.mov(opnds[0], value) + }; + }, + Op::Not => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnds[0] { + Opnd::Mem(_) => asm.load(opnds[0]), + _ => opnds[0] + }; + + asm.not(opnd0); + }, + Op::Store => { + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + _ => asm.load(opnds[1]) + }; - todo!(); + asm.store(opnds[0], opnd1); + }, + _ => { + asm.push_insn(op, opnds, target); + } + }; + }) } /// Emit platform-specific machine code /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> { - // NOTE: dear Kevin, - // for arm, you may want to reserve 1 or 2 caller-save registers - // to use as scracth registers (during the last phase of the codegen) - // These registers will not be allocated to anything by the register - // allocator, they're just useful because arm is slightly trickier - // than x86 to generate code for. - // For example, if you want to jump far away, you may want to store - // the jump target address in a register first. - - todo!(); + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { + match target { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + let offset = dst_addr - src_addr; + + // If the jump offset fits into the conditional jump as an + // immediate value and it's properly aligned, then we can + // use the b.cond instruction directly. Otherwise, we need + // to load the address into a register and use the branch + // register instruction. + if bcond_offset_fits_bits(offset) { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + } else { + // If the condition is met, then we'll skip past the + // next instruction, put the address in a register, and + // jump to it. + bcond(cb, condition, A64Opnd::new_imm(4)); + + // If the offset fits into a direct jump, then we'll use + // that and the number of instructions will be shorter. + // Otherwise we'll use the branch register instruction. + if b_offset_fits_bits(offset) { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that performs the direct jump. + b(cb, A64Opnd::new_imm(4)); + + // Here we'll perform the direct jump to the target. + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that perform the direct jump. + b(cb, A64Opnd::new_imm(8)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + }); + }, + Target::FunPtr(_) => unreachable!() + }; + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // For each instruction + for insn in &self.insns { + match insn.op { + Op::Comment => { + if cfg!(feature = "asm_comments") { + cb.add_comment(&insn.text.as_ref().unwrap()); + } + }, + Op::Label => { + cb.write_label(insn.target.unwrap().unwrap_label_idx()); + }, + Op::Add => { + add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Sub => { + sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::And => { + and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Not => { + mvn(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::Store => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); + }, + Op::Load => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value being + // loaded is a heap object, we'll report that back out to + // the gc_offsets list. + if let Opnd::Value(val) = insn.opnds[0] { + if !val.special_const_p() { + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + } + }, + Op::Mov => { + mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Lea => { + ldur(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CPush => { + add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + mov(cb, A64Opnd::new_mem(64, C_SP_REG, 0), insn.opnds[0].into()); + }, + Op::CPop => { + mov(cb, insn.out.into(), A64Opnd::new_mem(64, C_SP_REG, 0)); + sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + }, + Op::CCall => { + // Temporary + assert!(insn.opnds.len() < C_ARG_REGS.len()); + + // For each operand + for (idx, opnd) in insn.opnds.iter().enumerate() { + mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); + } + + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; + + // The offset between the two instructions in bytes. Note + // that when we encode this into a bl instruction, we'll + // divide by 4 because it accepts the number of instructions + // to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the branch + // link instruction. Otherwise, we'll move the destination + // and return address into appropriate registers and use the + // branch register instruction. + if b_offset_fits_bits(offset) { + bl(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X30, A64Opnd::new_uimm(src_addr as u64)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Op::CRet => { + // TODO: bias allocation towards return register + if insn.opnds[0] != Opnd::Reg(C_RET_REG) { + mov(cb, C_RET_OPND.into(), insn.opnds[0].into()); + } + + ret(cb, A64Opnd::None); + }, + Op::Cmp => { + cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Test => { + tst(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::JmpOpnd => { + br(cb, insn.opnds[0].into()); + }, + Op::Jmp => { + match insn.target.unwrap() { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + + // The offset between the two instructions in bytes. + // Note that when we encode this into a b + // instruction, we'll divide by 4 because it accepts + // the number of instructions to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + if b_offset_fits_bits(offset) { + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4)); + }); + }, + _ => unreachable!() + }; + }, + Op::Je => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jbe => { + emit_conditional_jump(cb, Condition::LS, insn.target.unwrap()); + }, + Op::Jz => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jnz => { + emit_conditional_jump(cb, Condition::NE, insn.target.unwrap()); + }, + Op::Jo => { + emit_conditional_jump(cb, Condition::VS, insn.target.unwrap()); + }, + Op::IncrCounter => { + ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Breakpoint => { + brk(cb, A64Opnd::None); + } + }; + } + + gc_offsets } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> { - self - .arm64_split() - .split_loads() - .alloc_regs(regs) - .arm64_emit(jit, cb) + let mut asm = self.arm64_split().split_loads().alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.arm64_emit(cb); + cb.link_labels(); + + gc_offsets + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, vec![X3_REG]); + + let insns = cb.get_ptr(0).raw_ptr() as *const u32; + assert_eq!(0x8b010003, unsafe { *insns }); } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 4e8ed0b8a4..bdefe1c6bc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -276,6 +276,13 @@ pub enum Target impl Target { + pub fn unwrap_fun_ptr(&self) -> *const u8 { + match self { + Target::FunPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into fun ptr", self) + } + } + pub fn unwrap_label_idx(&self) -> usize { match self { Target::Label(idx) => *idx, diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 0841c9ffa5..4794695094 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -1,3 +1,8 @@ +#[cfg(target_arch = "x86_64")] pub mod x86_64; + +#[cfg(target_arch = "aarch64")] +pub mod arm64; + pub mod ir; -mod tests;
\ No newline at end of file +mod tests; diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f4e0d4f53a..19b5096a26 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -186,6 +186,9 @@ impl Assembler for (idx, opnd) in insn.opnds.iter().enumerate() { mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); } + + let ptr = insn.target.unwrap().unwrap_fun_ptr(); + call_ptr(cb, RAX, ptr); }, Op::CRet => { |
