4 files changed, 148 insertions, 22 deletions
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs
index 54a6e6c344..d569828a24 100644
--- a/yjit/src/asm/arm64/arg/bitmask_imm.rs
+++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs
@@ -72,13 +72,31 @@ impl TryFrom<u64> for BitmaskImmediate {
     }
 }
 
-impl From<BitmaskImmediate> for u32 {
+impl BitmaskImmediate {
+    /// Attempt to make a BitmaskImmediate for a 32 bit register.
+    /// The result has N==0, which is required for some 32-bit instructions.
+    /// Note that the exact same BitmaskImmediate produces different values
+    /// depending on the size of the target register.
+    pub fn new_32b_reg(value: u32) -> Result<Self, ()> {
+        // The same bit pattern replicated to u64
+        let value = value as u64;
+        let replicated: u64 = (value << 32) | value;
+        let converted = Self::try_from(replicated);
+        if let Ok(ref imm) = converted {
+            assert_eq!(0, imm.n);
+        }
+
+        converted
+    }
+}
+
+impl BitmaskImmediate {
     /// Encode a bitmask immediate into a 32-bit value.
-    fn from(bitmask: BitmaskImmediate) -> Self {
+    pub fn encode(self) -> u32 {
         0
-        | ((bitmask.n as u32) << 12)
-        | ((bitmask.immr as u32) << 6)
-        | (bitmask.imms as u32)
+        | ((self.n as u32) << 12)
+        | ((self.immr as u32) << 6)
+        | (self.imms as u32)
     }
 }
 
@@ -96,7 +114,7 @@ mod tests {
     #[test]
     fn test_negative() {
         let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap();
-        let encoded: u32 = bitmask.into();
+        let encoded: u32 = bitmask.encode();
         assert_eq!(7998, encoded);
     }
 
@@ -207,4 +225,31 @@ mod tests {
         let bitmask = BitmaskImmediate::try_from(u64::MAX);
         assert!(matches!(bitmask, Err(())));
     }
+
+    #[test]
+    fn test_all_valid_32b_pattern() {
+        let mut patterns = vec![];
+        for pattern_size in [2, 4, 8, 16, 32_u64] {
+            for ones_count in 1..pattern_size {
+                for rotation in 0..pattern_size {
+                    let ones = (1_u64 << ones_count) - 1;
+                    let rotated = (ones >> rotation) |
+                        ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation));
+                    let mut replicated = rotated;
+                    let mut shift = pattern_size;
+                    while shift < 32 {
+                        replicated |= replicated << shift;
+                        shift *= 2;
+                    }
+                    let replicated: u32 = replicated.try_into().unwrap();
+                    assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok());
+                    patterns.push(replicated);
+                }
+            }
+        }
+        patterns.sort();
+        patterns.dedup();
+        // Up to {size}-1 ones, and a total of {size} possible rotations.
+        assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len());
+    }
 }
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
index 73eec8b37c..b24916f8a5 100644
--- a/yjit/src/asm/arm64/inst/logical_imm.rs
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -86,7 +86,7 @@ const FAMILY: u32 = 0b1001;
 impl From<LogicalImm> for u32 {
     /// Convert an instruction into a 32-bit value.
     fn from(inst: LogicalImm) -> Self {
-        let imm: u32 = inst.imm.into();
+        let imm: u32 = inst.imm.encode();
 
         0
         | ((inst.sf as u32) << 31)
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index 9d85705ff8..e5697e26fb 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -135,8 +135,13 @@ pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::and(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to and instruction."),
     };
@@ -157,8 +162,13 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::ands(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to ands instruction."),
     };
@@ -305,8 +315,13 @@ pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::eor(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::eor(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to eor instruction."),
     };
@@ -604,7 +619,13 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
             LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
         },
         (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
-            LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::mov(rd.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to mov instruction")
     };
@@ -712,8 +733,13 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
         },
         (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
             assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
 
-            LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::orr(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
         },
         _ => panic!("Invalid operand combination to orr instruction."),
     };
@@ -995,7 +1021,13 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
             LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into()
         },
         (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
-            LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into()
+            let bitmask_imm = if rn.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::tst(rn.reg_no, bitmask_imm, rn.num_bits).into()
         },
         _ => panic!("Invalid operand combination to tst instruction."),
     };
@@ -1098,6 +1130,11 @@ mod tests {
     }
 
     #[test]
+    fn test_and_32b_immedaite() {
+        check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff)));
+    }
+
+    #[test]
     fn test_ands_register() {
         check_bytes("200002ea", |cb| ands(cb, X0, X1, X2));
     }
@@ -1208,6 +1245,11 @@ mod tests {
     }
 
     #[test]
+    fn test_eor_32b_immediate() {
+        check_bytes("29040152", |cb| eor(cb, W9, W1, A64Opnd::new_uimm(0x80000001)));
+    }
+
+    #[test]
     fn test_ldaddal() {
         check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
     }
@@ -1303,6 +1345,10 @@ mod tests {
     }
 
     #[test]
+    fn test_mov_32b_immediate() {
+        check_bytes("ea070132", |cb| mov(cb, W10, A64Opnd::new_uimm(0x80000001)));
+    }
+    #[test]
     fn test_mov_into_sp() {
         check_bytes("1f000091", |cb| mov(cb, X31, X0));
     }
@@ -1358,6 +1404,11 @@ mod tests {
     }
 
     #[test]
+    fn test_orr_32b_immediate() {
+        check_bytes("6a010032", |cb| orr(cb, W10, W11, A64Opnd::new_uimm(1)));
+    }
+
+    #[test]
     fn test_ret_none() {
         check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None));
     }
@@ -1481,4 +1532,9 @@ mod tests {
     fn test_tst_immediate() {
         check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7)));
     }
+
+    #[test]
+    fn test_tst_32b_immediate() {
+        check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff)));
+    }
 }
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 692e4371e2..ee4f96c0d8 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -157,24 +157,31 @@ impl Assembler
         /// Operands that take the place of bitmask immediates must follow a
         /// certain encoding. In this function we ensure that those operands
         /// do follow that encoding, and if they don't then we load them first.
-        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
             match opnd {
                 Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
                 Opnd::Mem(_) => split_load_operand(asm, opnd),
                 Opnd::Imm(imm) => {
                     if imm <= 0 {
                         asm.load(opnd)
-                    } else if BitmaskImmediate::try_from(imm as u64).is_ok() {
+                    } else if (dest_num_bits == 64 &&
+                                BitmaskImmediate::try_from(imm as u64).is_ok()) ||
+                            (dest_num_bits == 32 &&
+                                u32::try_from(imm).is_ok() &&
+                                BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) {
                         Opnd::UImm(imm as u64)
                     } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                     }
                 },
                 Opnd::UImm(uimm) => {
-                    if BitmaskImmediate::try_from(uimm).is_ok() {
+                    if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) ||
+                        (dest_num_bits == 32 &&
+                            u32::try_from(uimm).is_ok() &&
+                            BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) {
                         opnd
                     } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                     }
                 },
                 Opnd::None | Opnd::Value(_) => unreachable!()
@@ -208,12 +215,12 @@ impl Assembler
                 },
                 (reg_opnd @ Opnd::Reg(_), other_opnd) |
                 (other_opnd, reg_opnd @ Opnd::Reg(_)) => {
-                    let opnd1 = split_bitmask_immediate(asm, other_opnd);
+                    let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits());
                     (reg_opnd, opnd1)
                 },
                 _ => {
                     let opnd0 = split_load_operand(asm, opnd0);
-                    let opnd1 = split_bitmask_immediate(asm, opnd1);
+                    let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits());
                     (opnd0, opnd1)
                 }
             }
@@ -449,7 +456,7 @@ impl Assembler
                         // register or an immediate that can be encoded as a
                         // bitmask immediate. Otherwise, we'll need to split the
                         // move into multiple instructions.
-                        _ => split_bitmask_immediate(asm, src)
+                        _ => split_bitmask_immediate(asm, src, dest.rm_num_bits())
                     };
 
                     // If we're attempting to load into a memory operand, then
@@ -508,7 +515,7 @@ impl Assembler
                     // unsigned immediate that can be encoded as a bitmask
                     // immediate. If it's not one of those, we'll need to load
                     // it first.
-                    let opnd1 = split_bitmask_immediate(asm, right);
+                    let opnd1 = split_bitmask_immediate(asm, right, opnd0.rm_num_bits());
                     asm.test(opnd0, opnd1);
                 },
                 _ => {
@@ -1187,6 +1194,24 @@ mod tests {
     }
 
     #[test]
+    fn test_emit_test_32b_reg_not_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(u32::MAX.into()));
+        // All ones is not encodable with a bitmask immediate,
+        // so this needs one register
+        asm.compile_with_num_regs(&mut cb, 1);
+    }
+
+    #[test]
+    fn test_emit_test_32b_reg_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(0x80000001));
+        asm.compile_with_num_regs(&mut cb, 0);
+    }
+
+    #[test]
     fn test_emit_or() {
         let (mut asm, mut cb) = setup_asm();