diff options
author | Alan Wu <XrXr@users.noreply.github.com> | 2021-11-05 15:44:29 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-05 15:44:29 -0400 |
commit | 91a9062626733c7d11ea4795bd1957a21f2adec6 (patch) | |
tree | e844b4b0c7c8c52e26701539b0fba103a58425f6 /yjit_asm.c | |
parent | 82ae9b092cf51062c49b95e81ad184e1dea0df1e (diff) |
YJIT: use shorter encoding for mov(r64,imm) when unambiguous (#5081)
* YJIT: use shorter encoding for mov(r64,imm) when unambiguous
Previously, for small constants such as `mov(RAX, imm_opnd(Qundef))`,
we emit an instruction with an 8-byte immediate. This form commonly
gets the `movabs` mnemonic.
In 64-bit mode, 32-bit operands get zero extended to 64-bit to fill the
register, so when the immediate is small enough, we can save 4 bytes by
using the `mov` variant that takes a 32-bit immediate and does a zero
extension.
Not implement with this change, there is an imm32 variant of `mov` that
does sign extension we could use. When the constant is negative, we
fallback to the `movabs` form.
In railsbench, this change yields roughly a 12% code size reduction for
the outlined block.
Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
* [ci skip] comment edit. Please squash.
Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
Notes
Notes:
Merged-By: maximecb <maximecb@ruby-lang.org>
Diffstat (limited to 'yjit_asm.c')
-rw-r--r-- | yjit_asm.c | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/yjit_asm.c b/yjit_asm.c index 49844145cb..2ae50295a9 100644 --- a/yjit_asm.c +++ b/yjit_asm.c @@ -1259,6 +1259,15 @@ void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x8D); } +// Does this number fit in 32 bits and stays the same if you zero extend it to 64 bit? +// If the sign bit is clear, sign extension and zero extension yield the same +// result. +static bool +zero_extendable_32bit(uint64_t number) +{ + return number <= UINT32_MAX && (number & (1ull << 31ull)) == 0; +} + /// mov - Data move operation void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { @@ -1275,14 +1284,27 @@ void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) unsig_imm_size(src.as.imm) <= dst.num_bits ); - if (dst.num_bits == 16) - cb_write_byte(cb, 0x66); - if (rex_needed(dst) || dst.num_bits == 64) - cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no); + // In case the source immediate could be zero extended to be 64 + // bit, we can use the 32-bit operands version of the instruction. + // For example, we can turn mov(rax, 0x34) into the equivalent + // mov(eax, 0x34). + if (dst.num_bits == 64 && zero_extendable_32bit(src.as.unsig_imm)) { + if (rex_needed(dst)) + cb_write_rex(cb, false, 0, 0, dst.as.reg.reg_no); + cb_write_opcode(cb, 0xB8, dst); + cb_write_int(cb, src.as.imm, 32); + } + else { + if (dst.num_bits == 16) + cb_write_byte(cb, 0x66); + + if (rex_needed(dst) || dst.num_bits == 64) + cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no); - cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst); + cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst); - cb_write_int(cb, src.as.imm, dst.num_bits); + cb_write_int(cb, src.as.imm, dst.num_bits); + } } // M + Imm |