YJIT: fix ARM64 bitmask encoding for 32 bit registers (#6503)

For logical instructions such as AND, there is a constraint that the N part of the bitmask immediate must be 0. We weren't respecting this condition previously and were silently emitting undefined instructions. Check for this condition in the assembler and tweak the backend to correctly detect whether a number could be encoded as an immediate in a 32 bit logical instruction. Due to the nature of the immediate encoding, the same numeric value encodes differently depending on the size of the register the instruction works on. We currently don't have cases where we use 32 bit immediates but we ran into this encoding issue during development.
author: Alan Wu <XrXr@users.noreply.github.com> 2022-10-06 18:41:38 -0400
committer: GitHub <noreply@github.com> 2022-10-06 18:41:38 -0400
commit: 43e87c7e8ab5cbf253c8f11daf9c8ad4bc3d7b3e (patch)
tree: 7696c3ee5c3e54abe7bfaccf763645291e8d909b /yjit
parent: fa2e1b67e548cb5653b66909a2bc3d6b9eae98e3 (diff)
4 files changed, 148 insertions, 22 deletions
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs
index 54a6e6c344..d569828a24 100644
--- a/yjit/src/asm/arm64/arg/bitmask_imm.rs
+++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs
@@ -72,13 +72,31 @@ impl TryFrom<u64> for BitmaskImmediate {
     }
 }
 
-impl From<BitmaskImmediate> for u32 {
+impl BitmaskImmediate {
+    /// Attempt to make a BitmaskImmediate for a 32 bit register.
+    /// The result has N==0, which is required for some 32-bit instructions.
+    /// Note that the exact same BitmaskImmediate produces different values
+    /// depending on the size of the target register.
+    pub fn new_32b_reg(value: u32) -> Result<Self, ()> {
+        // The same bit pattern replicated to u64
+        let value = value as u64;
+        let replicated: u64 = (value << 32) | value;
+        let converted = Self::try_from(replicated);
+        if let Ok(ref imm) = converted {
+            assert_eq!(0, imm.n);
+        }
+
+        converted
+    }
+}
+
+impl BitmaskImmediate {
     /// Encode a bitmask immediate into a 32-bit value.
-    fn from(bitmask: BitmaskImmediate) -> Self {
+    pub fn encode(self) -> u32 {
         0
-        | ((bitmask.n as u32) << 12)
-        | ((bitmask.immr as u32) << 6)
-        | (bitmask.imms as u32)
+        | ((self.n as u32) << 12)
+        | ((self.immr as u32) << 6)
+        | (self.imms as u32)
     }
 }
 
@@ -96,7 +114,7 @@ mod tests {
     #[test]
     fn test_negative() {
         let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap();
-        let encoded: u32 = bitmask.into();
+        let encoded: u32 = bitmask.encode();
         assert_eq!(7998, encoded);
     }
 
@@ -207,4 +225,31 @@ mod tests {
         let bitmask = BitmaskImmediate::try_from(u64::MAX);
         assert!(matches!(bitmask, Err(())));
     }
+
+    #[test]
+    fn test_all_valid_32b_pattern() {
+        let mut patterns = vec![];
+        for pattern_size in [2, 4, 8, 16, 32_u64] {
+            for ones_count in 1..pattern_size {
+                for rotation in 0..pattern_size {
+                    let ones = (1_u64 << ones_count) - 1;
+                    let rotated = (ones >> rotation) |
+                        ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation));
+                    let mut replicated = rotated;
+                    let mut shift = pattern_size;
+                    while shift < 32 {
+                        replicated |= replicated << shift;
+                        shift *= 2;
+                    }
+                    let replicated: u32 = replicated.try_into().unwrap();
+                    assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok());
+                    patterns.push(replicated);
+                }
+            }
+        }
+        patterns.sort();
+        patterns.dedup();
+        // Up to {size}-1 ones, and a total of {size} possible rotations.
+        assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len());
+    }
 }
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
index 73eec8b37c..b24916f8a5 100644
--- a/yjit/src/asm/arm64/inst/logical_imm.rs
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -86,7 +86,7 @@ const FAMILY: u32 = 0b1001;
 impl From<LogicalImm> for u32 {
     /// Convert an instruction into a 32-bit value.
     fn from(inst: LogicalImm) -> Self {
-        let imm: u32 = inst.imm.into();
+        let imm: u32 = inst.imm.encode();
 
         0
         | ((inst.sf as u32) << 31)
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index 9d85705ff8..e5697e26fb 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -135,8 +135,13 @@ pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::and(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to and instruction."),
     };
@@ -157,8 +162,13 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::ands(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to ands instruction."),
     };
@@ -305,8 +315,13 @@ pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::eor(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::eor(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to eor instruction."),
     };
@@ -604,7 +619,13 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
             LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
         },
         (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
-            LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::mov(rd.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to mov instruction")
     };
@@ -712,8 +733,13 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::orr(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to orr instruction."),
     };
@@ -995,7 +1021,13 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
             LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into()
         },
         (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
-            LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into()
+            let bitmask_imm = if rn.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::tst(rn.reg_no, bitmask_imm, rn.num_bits).into()
         },
         _ => panic!("Invalid operand combination to tst instruction."),
     };
@@ -1098,6 +1130,11 @@ mod tests {
     }
 
     #[test]
+    fn test_and_32b_immedaite() {
+        check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff)));
+    }
+
+    #[test]
     fn test_ands_register() {
         check_bytes("200002ea", |cb| ands(cb, X0, X1, X2));
     }
@@ -1208,6 +1245,11 @@ mod tests {
     }
 
     #[test]
+    fn test_eor_32b_immediate() {
+        check_bytes("29040152", |cb| eor(cb, W9, W1, A64Opnd::new_uimm(0x80000001)));
+    }
+
+    #[test]
     fn test_ldaddal() {
         check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
     }
@@ -1303,6 +1345,10 @@ mod tests {
     }
 
     #[test]
+    fn test_mov_32b_immediate() {
+        check_bytes("ea070132", |cb| mov(cb, W10, A64Opnd::new_uimm(0x80000001)));
+    }
+    #[test]
     fn test_mov_into_sp() {
         check_bytes("1f000091", |cb| mov(cb, X31, X0));
     }
@@ -1358,6 +1404,11 @@ mod tests {
     }
 
     #[test]
+    fn test_orr_32b_immediate() {
+        check_bytes("6a010032", |cb| orr(cb, W10, W11, A64Opnd::new_uimm(1)));
+    }
+
+    #[test]
     fn test_ret_none() {
         check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None));
     }
@@ -1481,4 +1532,9 @@ mod tests {
     fn test_tst_immediate() {
         check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7)));
     }
+
+    #[test]
+    fn test_tst_32b_immediate() {
+        check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff)));
+    }
 }
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 692e4371e2..ee4f96c0d8 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -157,24 +157,31 @@ impl Assembler
         /// Operands that take the place of bitmask immediates must follow a
         /// certain encoding. In this function we ensure that those operands
         /// do follow that encoding, and if they don't then we load them first.
-        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
             match opnd {
                 Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
                 Opnd::Mem(_) => split_load_operand(asm, opnd),
                 Opnd::Imm(imm) => {
                     if imm <= 0 {
                         asm.load(opnd)
-                    } else if BitmaskImmediate::try_from(imm as u64).is_ok() {
+                    } else if (dest_num_bits == 64 &&
+                                BitmaskImmediate::try_from(imm as u64).is_ok()) ||
+                            (dest_num_bits == 32 &&
+                                u32::try_from(imm).is_ok() &&
+                                BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) {
                         Opnd::UImm(imm as u64)
                     } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                     }
                 },
                 Opnd::UImm(uimm) => {
-                    if BitmaskImmediate::try_from(uimm).is_ok() {
+                    if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) ||
+                        (dest_num_bits == 32 &&
+                            u32::try_from(uimm).is_ok() &&
+                            BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) {
                         opnd
                     } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                     }
                 },
                 Opnd::None | Opnd::Value(_) => unreachable!()
@@ -208,12 +215,12 @@ impl Assembler
                 },
                 (reg_opnd @ Opnd::Reg(_), other_opnd) |
                 (other_opnd, reg_opnd @ Opnd::Reg(_)) => {
-                    let opnd1 = split_bitmask_immediate(asm, other_opnd);
+                    let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits());
                     (reg_opnd, opnd1)
                 },
                 _ => {
                     let opnd0 = split_load_operand(asm, opnd0);
-                    let opnd1 = split_bitmask_immediate(asm, opnd1);
+                    let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits());
                     (opnd0, opnd1)
                 }
             }
@@ -449,7 +456,7 @@ impl Assembler
                         // register or an immediate that can be encoded as a
                         // bitmask immediate. Otherwise, we'll need to split the
                         // move into multiple instructions.
-                        _ => split_bitmask_immediate(asm, src)
+                        _ => split_bitmask_immediate(asm, src, dest.rm_num_bits())
                     };
 
                     // If we're attempting to load into a memory operand, then
@@ -508,7 +515,7 @@ impl Assembler
                     // unsigned immediate that can be encoded as a bitmask
                     // immediate. If it's not one of those, we'll need to load
                     // it first.
-                    let opnd1 = split_bitmask_immediate(asm, right);
+                    let opnd1 = split_bitmask_immediate(asm, right, opnd0.rm_num_bits());
                     asm.test(opnd0, opnd1);
                 },
                 _ => {
@@ -1187,6 +1194,24 @@ mod tests {
     }
 
     #[test]
+    fn test_emit_test_32b_reg_not_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(u32::MAX.into()));
+        // All ones is not encodable with a bitmask immediate,
+        // so this needs one register
+        asm.compile_with_num_regs(&mut cb, 1);
+    }
+
+    #[test]
+    fn test_emit_test_32b_reg_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(0x80000001));
+        asm.compile_with_num_regs(&mut cb, 0);
+    }
+
+    #[test]
     fn test_emit_or() {
         let (mut asm, mut cb) = setup_asm();
author	Alan Wu <XrXr@users.noreply.github.com>	2022-10-06 18:41:38 -0400
committer	GitHub <noreply@github.com>	2022-10-06 18:41:38 -0400
commit	43e87c7e8ab5cbf253c8f11daf9c8ad4bc3d7b3e (patch)
tree	7696c3ee5c3e54abe7bfaccf763645291e8d909b /yjit
parent	fa2e1b67e548cb5653b66909a2bc3d6b9eae98e3 (diff)