summaryrefslogtreecommitdiff
path: root/yjit
diff options
context:
space:
mode:
Diffstat (limited to 'yjit')
-rw-r--r--yjit/Cargo.lock8
-rw-r--r--yjit/Cargo.toml44
-rw-r--r--yjit/bindgen/Cargo.lock358
-rw-r--r--yjit/bindgen/Cargo.toml6
-rw-r--r--yjit/bindgen/src/main.rs188
-rw-r--r--yjit/not_gmake.mk2
-rw-r--r--yjit/src/asm/arm64/arg/bitmask_imm.rs2
-rw-r--r--yjit/src/asm/arm64/arg/sys_reg.rs4
-rw-r--r--yjit/src/asm/arm64/inst/atomic.rs4
-rw-r--r--yjit/src/asm/arm64/inst/branch.rs8
-rw-r--r--yjit/src/asm/arm64/inst/branch_cond.rs4
-rw-r--r--yjit/src/asm/arm64/inst/breakpoint.rs4
-rw-r--r--yjit/src/asm/arm64/inst/call.rs6
-rw-r--r--yjit/src/asm/arm64/inst/conditional.rs4
-rw-r--r--yjit/src/asm/arm64/inst/data_imm.rs12
-rw-r--r--yjit/src/asm/arm64/inst/data_reg.rs12
-rw-r--r--yjit/src/asm/arm64/inst/halfword_imm.rs14
-rw-r--r--yjit/src/asm/arm64/inst/load_literal.rs4
-rw-r--r--yjit/src/asm/arm64/inst/load_register.rs4
-rw-r--r--yjit/src/asm/arm64/inst/load_store.rs22
-rw-r--r--yjit/src/asm/arm64/inst/load_store_exclusive.rs6
-rw-r--r--yjit/src/asm/arm64/inst/logical_imm.rs14
-rw-r--r--yjit/src/asm/arm64/inst/logical_reg.rs18
-rw-r--r--yjit/src/asm/arm64/inst/madd.rs2
-rw-r--r--yjit/src/asm/arm64/inst/mov.rs6
-rw-r--r--yjit/src/asm/arm64/inst/nop.rs2
-rw-r--r--yjit/src/asm/arm64/inst/pc_rel.rs6
-rw-r--r--yjit/src/asm/arm64/inst/reg_pair.rs26
-rw-r--r--yjit/src/asm/arm64/inst/sbfm.rs6
-rw-r--r--yjit/src/asm/arm64/inst/shift_imm.rs6
-rw-r--r--yjit/src/asm/arm64/inst/smulh.rs2
-rw-r--r--yjit/src/asm/arm64/inst/sys_reg.rs6
-rw-r--r--yjit/src/asm/arm64/inst/test_bit.rs6
-rw-r--r--yjit/src/asm/arm64/mod.rs70
-rw-r--r--yjit/src/asm/mod.rs91
-rw-r--r--yjit/src/asm/x86_64/mod.rs11
-rw-r--r--yjit/src/asm/x86_64/tests.rs1
-rw-r--r--yjit/src/backend/arm64/mod.rs183
-rw-r--r--yjit/src/backend/ir.rs473
-rw-r--r--yjit/src/backend/tests.rs19
-rw-r--r--yjit/src/backend/x86_64/mod.rs93
-rw-r--r--yjit/src/codegen.rs5657
-rw-r--r--yjit/src/core.rs2021
-rw-r--r--yjit/src/cruby.rs153
-rw-r--r--yjit/src/cruby_bindings.inc.rs809
-rw-r--r--yjit/src/disasm.rs159
-rw-r--r--yjit/src/invariants.rs171
-rw-r--r--yjit/src/lib.rs14
-rw-r--r--yjit/src/log.rs179
-rw-r--r--yjit/src/options.rs205
-rw-r--r--yjit/src/stats.rs497
-rw-r--r--yjit/src/utils.rs28
-rw-r--r--yjit/src/virtualmem.rs119
-rw-r--r--yjit/src/yjit.rs90
-rw-r--r--yjit/yjit.mk93
55 files changed, 8069 insertions, 3883 deletions
diff --git a/yjit/Cargo.lock b/yjit/Cargo.lock
index e9a59cb771..8b6ac39806 100644
--- a/yjit/Cargo.lock
+++ b/yjit/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
[[package]]
name = "capstone"
-version = "0.10.0"
+version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66b5d1f14c3539b6ff22fcb602fea5f1c4416148c8b7965a2e74860aa169b7b5"
+checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a"
dependencies = [
"capstone-sys",
"libc",
@@ -14,9 +14,9 @@ dependencies = [
[[package]]
name = "capstone-sys"
-version = "0.14.0"
+version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df653a22d0ad34b0d91cc92a6289d96e44aac1c9a96250a094c9aeec4a91084f"
+checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0"
dependencies = [
"cc",
"libc",
diff --git a/yjit/Cargo.toml b/yjit/Cargo.toml
index 2194402cdd..d3124e608c 100644
--- a/yjit/Cargo.toml
+++ b/yjit/Cargo.toml
@@ -9,40 +9,20 @@ edition = "2021" # Rust 2021 edition to compile with
rust-version = "1.58.0" # Minimally supported rust version
publish = false # Don't publish to crates.io
-[lib]
-crate-type = ["staticlib"]
-
[dependencies]
-# No required dependencies to simplify build process. TODO: Link to yet to be
-# written rationale. Optional For development and testing purposes
-capstone = { version = "0.10.0", optional = true }
+# No required dependencies to simplify build process.
+# Optional For development and testing purposes.
+capstone = { version = "0.13.0", optional = true }
+jit = { version = "0.1.0", path = "../jit" }
-[features]
# NOTE: Development builds select a set of these via configure.ac
# For debugging, `make V=1` shows exact cargo invocation.
+[features]
+# Support --yjit-dump-disasm and RubyVM::YJIT.disasm using libcapstone.
disasm = ["capstone"]
-stats = []
-
-[profile.dev]
-opt-level = 0
-debug = true
-debug-assertions = true
-overflow-checks = true
-
-[profile.dev_nodebug]
-inherits = "dev"
-
-[profile.stats]
-inherits = "release"
-
-[profile.release]
-# NOTE: --enable-yjit builds use `rustc` without going through Cargo. You
-# might want to update the `rustc` invocation if you change this profile.
-opt-level = 3
-# The extra robustness that comes from checking for arithmetic overflow is
-# worth the performance cost for the compiler.
-overflow-checks = true
-# Generate debug info
-debug = true
-# Use ThinLTO. Much smaller output for a small amount of build time increase.
-lto = "thin"
+# Modify generated code for runtime checks, e.g. poison value in PC
+# for C method calls, and stack canary. This is managed separately
+# from cfg!(debug_assertions) so that we can see disasm of the code
+# that would run in the release mode.
+runtime_checks = []
+stats_allocator = []
diff --git a/yjit/bindgen/Cargo.lock b/yjit/bindgen/Cargo.lock
index fc8d3927c4..a58a736599 100644
--- a/yjit/bindgen/Cargo.lock
+++ b/yjit/bindgen/Cargo.lock
@@ -1,55 +1,90 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
name = "aho-corasick"
-version = "0.7.20"
+version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
+name = "anstream"
+version = "0.6.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
+dependencies = [
+ "anstyle",
+ "windows-sys",
+]
+
+[[package]]
name = "bindgen"
-version = "0.63.0"
+version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36d860121800b2a9a94f9b5604b332d5cffb234ce17609ea479d723dbc9d3885"
+checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
"cexpr",
"clang-sys",
- "lazy_static",
- "lazycell",
+ "itertools",
"log",
- "peeking_take_while",
+ "prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
- "which",
]
[[package]]
name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
-
-[[package]]
-name = "cc"
-version = "1.0.79"
+version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "cexpr"
@@ -68,9 +103,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
-version = "1.4.0"
+version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"glob",
"libc",
@@ -78,56 +113,45 @@ dependencies = [
]
[[package]]
-name = "either"
-version = "1.8.0"
+name = "colorchoice"
+version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
-name = "env_logger"
-version = "0.10.0"
+name = "either"
+version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
-dependencies = [
- "humantime",
- "is-terminal",
- "log",
- "regex",
- "termcolor",
-]
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
-name = "errno"
-version = "0.3.1"
+name = "env_filter"
+version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
+checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
dependencies = [
- "errno-dragonfly",
- "libc",
- "windows-sys",
+ "log",
+ "regex",
]
[[package]]
-name = "errno-dragonfly"
-version = "0.1.2"
+name = "env_logger"
+version = "0.11.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
+checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
dependencies = [
- "cc",
- "libc",
+ "anstream",
+ "anstyle",
+ "env_filter",
+ "humantime",
+ "log",
]
[[package]]
name = "glob"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
-
-[[package]]
-name = "hermit-abi"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "humantime"
@@ -136,64 +160,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
-name = "is-terminal"
-version = "0.4.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb"
-dependencies = [
- "hermit-abi",
- "rustix",
- "windows-sys",
-]
-
-[[package]]
-name = "lazy_static"
-version = "1.4.0"
+name = "is_terminal_polyfill"
+version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
-name = "lazycell"
-version = "1.3.0"
+name = "itertools"
+version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
[[package]]
name = "libc"
-version = "0.2.149"
+version = "0.2.161"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
+checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
[[package]]
name = "libloading"
-version = "0.7.4"
+version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
+checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
- "winapi",
+ "windows-targets",
]
[[package]]
-name = "linux-raw-sys"
-version = "0.4.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
-
-[[package]]
name = "log"
-version = "0.4.17"
+version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
-dependencies = [
- "cfg-if",
-]
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
-version = "2.5.0"
+version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "minimal-lexical"
@@ -203,49 +210,59 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
-version = "7.1.1"
+version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
-name = "once_cell"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
-
-[[package]]
-name = "peeking_take_while"
-version = "0.1.2"
+name = "prettyplease"
+version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
[[package]]
name = "proc-macro2"
-version = "1.0.47"
+version = "1.0.88"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
+checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
-version = "1.0.21"
+version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
-version = "1.7.0"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
+checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
dependencies = [
"aho-corasick",
"memchr",
@@ -254,9 +271,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
-version = "0.6.28"
+version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-hash"
@@ -265,29 +282,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
-name = "rustix"
-version = "0.38.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
-dependencies = [
- "bitflags 2.4.1",
- "errno",
- "libc",
- "linux-raw-sys",
- "windows-sys",
-]
-
-[[package]]
name = "shlex"
-version = "1.1.0"
+version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
-version = "1.0.105"
+version = "2.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908"
+checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
dependencies = [
"proc-macro2",
"quote",
@@ -295,80 +299,36 @@ dependencies = [
]
[[package]]
-name = "termcolor"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
name = "unicode-ident"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
-
-[[package]]
-name = "which"
-version = "4.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b"
-dependencies = [
- "either",
- "libc",
- "once_cell",
-]
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
+version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
-name = "winapi-util"
-version = "0.1.5"
+name = "utf8parse"
+version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
-version = "0.48.0"
+version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
-version = "0.48.1"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
+ "windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
@@ -377,45 +337,51 @@ dependencies = [
[[package]]
name = "windows_aarch64_gnullvm"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
-version = "0.48.0"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
-version = "0.48.0"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "yjit-bindgen"
diff --git a/yjit/bindgen/Cargo.toml b/yjit/bindgen/Cargo.toml
index a85c04cf0e..ba695e0ce6 100644
--- a/yjit/bindgen/Cargo.toml
+++ b/yjit/bindgen/Cargo.toml
@@ -6,5 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
-bindgen = "0.63.0"
-env_logger = "0.10.0"
+bindgen = "0.70.1"
+env_logger = "0.11.5"
+
+[workspace]
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 848e9fadc4..fd99d52904 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -38,6 +38,7 @@ fn main() {
.clang_args(filtered_clang_args)
.header("encindex.h")
.header("internal.h")
+ .header("internal/object.h")
.header("internal/re.h")
.header("include/ruby/ruby.h")
.header("shape.h")
@@ -46,6 +47,7 @@ fn main() {
// Our C file for glue code
.header(src_root.join("yjit.c").to_str().unwrap())
+ .header(src_root.join("jit.c").to_str().unwrap())
// Don't want to copy over C comment
.generate_comments(false)
@@ -60,104 +62,70 @@ fn main() {
.blocklist_type("size_t")
.blocklist_type("fpos_t")
- // Prune these types since they are system dependant and we don't use them
- .blocklist_type("__.*")
-
// Import YARV bytecode instruction constants
.allowlist_type("ruby_vminsn_type")
- // From include/ruby/internal/special_consts.h
.allowlist_type("ruby_special_consts")
-
- // From include/ruby/internal/intern/string.h
.allowlist_function("rb_utf8_str_new")
.allowlist_function("rb_str_buf_append")
.allowlist_function("rb_str_dup")
-
- // From encindex.h
.allowlist_type("ruby_preserved_encindex")
-
- // From include/ruby/ruby.h
.allowlist_function("rb_class2name")
// This struct is public to Ruby C extensions
- // From include/ruby/internal/core/rbasic.h
.allowlist_type("RBasic")
- // From include/ruby/internal/core/rstring.h
.allowlist_type("ruby_rstring_flags")
- // From internal.h
// This function prints info about a value and is useful for debugging
.allowlist_function("rb_obj_info_dump")
// For crashing
.allowlist_function("rb_bug")
- // From shape.h
- .allowlist_function("rb_shape_get_shape_id")
- .allowlist_function("rb_shape_get_shape_by_id")
+ .allowlist_function("rb_obj_shape_id")
.allowlist_function("rb_shape_id_offset")
.allowlist_function("rb_shape_get_iv_index")
- .allowlist_function("rb_shape_get_next")
- .allowlist_function("rb_shape_id")
- .allowlist_function("rb_shape_obj_too_complex")
+ .allowlist_function("rb_shape_transition_add_ivar_no_warnings")
+ .allowlist_function("rb_yjit_shape_obj_too_complex_p")
+ .allowlist_function("rb_yjit_shape_capacity")
+ .allowlist_function("rb_yjit_shape_index")
.allowlist_var("SHAPE_ID_NUM_BITS")
- .allowlist_var("OBJ_TOO_COMPLEX_SHAPE_ID")
-
- // From ruby/internal/intern/object.h
+ .allowlist_type("shape_id_mask")
+ .allowlist_function("rb_funcall")
.allowlist_function("rb_obj_is_kind_of")
.allowlist_function("rb_obj_frozen_p")
-
- // From ruby/internal/encoding/encoding.h
.allowlist_type("ruby_encoding_consts")
-
- // From include/hash.h
.allowlist_function("rb_hash_new")
-
- // From internal/hash.h
.allowlist_function("rb_hash_new_with_size")
.allowlist_function("rb_hash_resurrect")
.allowlist_function("rb_hash_stlike_foreach")
-
- // From include/ruby/st.h
+ .allowlist_function("rb_to_hash_type")
.allowlist_type("st_retval")
-
- // From include/ruby/internal/intern/hash.h
.allowlist_function("rb_hash_aset")
.allowlist_function("rb_hash_aref")
.allowlist_function("rb_hash_bulk_insert")
.allowlist_function("rb_hash_stlike_lookup")
-
- // From include/ruby/internal/intern/array.h
.allowlist_function("rb_ary_new_capa")
.allowlist_function("rb_ary_store")
.allowlist_function("rb_ary_resurrect")
+ .allowlist_function("rb_ary_cat")
.allowlist_function("rb_ary_clear")
.allowlist_function("rb_ary_dup")
.allowlist_function("rb_ary_push")
.allowlist_function("rb_ary_unshift_m")
.allowlist_function("rb_yjit_rb_ary_subseq_length")
-
- // From internal/array.h
.allowlist_function("rb_ec_ary_new_from_values")
.allowlist_function("rb_ary_tmp_new_from_values")
-
- // From include/ruby/internal/intern/class.h
.allowlist_function("rb_class_attached_object")
.allowlist_function("rb_singleton_class")
-
- // From include/ruby/internal/core/rclass.h
.allowlist_function("rb_class_get_superclass")
-
- // From include/ruby/internal/gc.h
.allowlist_function("rb_gc_mark")
.allowlist_function("rb_gc_mark_movable")
.allowlist_function("rb_gc_location")
.allowlist_function("rb_gc_writebarrier")
// VALUE variables for Ruby class objects
- // From include/ruby/internal/globals.h
.allowlist_var("rb_cBasicObject")
.allowlist_var("rb_cModule")
.allowlist_var("rb_cNilClass")
@@ -167,84 +135,61 @@ fn main() {
.allowlist_var("rb_cIO")
.allowlist_var("rb_cSymbol")
.allowlist_var("rb_cFloat")
+ .allowlist_var("rb_cNumeric")
.allowlist_var("rb_cString")
.allowlist_var("rb_cThread")
.allowlist_var("rb_cArray")
.allowlist_var("rb_cHash")
+ .allowlist_var("rb_cClass")
- // From include/ruby/internal/fl_type.h
.allowlist_type("ruby_fl_type")
.allowlist_type("ruby_fl_ushift")
-
- // From include/ruby/internal/core/robject.h
.allowlist_type("ruby_robject_flags")
-
- // From include/ruby/internal/core/rarray.h
.allowlist_type("ruby_rarray_flags")
.allowlist_type("ruby_rarray_consts")
-
- // From include/ruby/internal/core/rclass.h
.allowlist_type("ruby_rmodule_flags")
-
- // From ruby/internal/globals.h
.allowlist_var("rb_mKernel")
-
- // From vm_callinfo.h
.allowlist_type("vm_call_flag_bits")
.allowlist_type("rb_call_data")
.blocklist_type("rb_callcache.*") // Not used yet - opaque to make it easy to import rb_call_data
.opaque_type("rb_callcache.*")
- .blocklist_type("rb_callinfo_kwarg") // Contains a VALUE[] array of undefined size, so we don't import
- .opaque_type("rb_callinfo_kwarg")
.allowlist_type("rb_callinfo")
-
- // From vm_insnhelper.h
.allowlist_var("VM_ENV_DATA_INDEX_ME_CREF")
+ .allowlist_var("VM_KW_SPECIFIED_BITS_MAX")
.allowlist_var("rb_block_param_proxy")
-
- // From include/ruby/internal/intern/range.h
.allowlist_function("rb_range_new")
-
- // From include/ruby/internal/symbol.h
.allowlist_function("rb_intern")
.allowlist_function("rb_intern2")
.allowlist_function("rb_id2sym")
.allowlist_function("rb_id2name")
.allowlist_function("rb_sym2id")
.allowlist_function("rb_str_intern")
-
- // From internal/numeric.h
.allowlist_function("rb_fix_aref")
-
- // From internal/string.h
+ .allowlist_function("rb_float_plus")
+ .allowlist_function("rb_float_minus")
+ .allowlist_function("rb_float_mul")
+ .allowlist_function("rb_float_div")
+ .allowlist_type("ruby_rstring_private_flags")
.allowlist_function("rb_ec_str_resurrect")
.allowlist_function("rb_str_concat_literals")
.allowlist_function("rb_obj_as_string_result")
-
- // From include/ruby/internal/intern/parse.h
+ .allowlist_function("rb_str_byte_substr")
+ .allowlist_function("rb_str_substr_two_fixnums")
.allowlist_function("rb_backref_get")
-
- // From include/ruby/internal/intern/re.h
.allowlist_function("rb_reg_last_match")
.allowlist_function("rb_reg_match_pre")
.allowlist_function("rb_reg_match_post")
.allowlist_function("rb_reg_match_last")
.allowlist_function("rb_reg_nth_match")
-
- // From internal/re.h
.allowlist_function("rb_reg_new_ary")
// `ruby_value_type` is a C enum and this stops it from
// prefixing all the members with the name of the type
.prepend_enum_name(false)
.translate_enum_integer_types(true) // so we get fixed width Rust types for members
- // From include/ruby/internal/value_type.h
.allowlist_type("ruby_value_type") // really old C extension API
- // From include/ruby/internal/hash.h
.allowlist_type("ruby_rhash_flags") // really old C extension API
-
- // From method.h
.allowlist_type("rb_method_visibility_t")
.allowlist_type("rb_method_type_t")
.allowlist_type("method_optimized_type")
@@ -255,12 +200,13 @@ fn main() {
.blocklist_type("rb_method_cfunc_t")
.blocklist_type("rb_method_definition_.*") // Large struct with a bitfield and union of many types - don't import (yet?)
.opaque_type("rb_method_definition_.*")
-
- // From vm_core.h
+ .allowlist_function("rb_float_new")
+ .allowlist_var("rb_cRubyVM")
.allowlist_var("rb_mRubyVMFrozenCore")
.allowlist_var("VM_BLOCK_HANDLER_NONE")
.allowlist_type("vm_frame_env_flags")
.allowlist_type("rb_seq_param_keyword_struct")
+ .allowlist_type("rb_callinfo_kwarg")
.allowlist_type("ruby_basic_operators")
.allowlist_var(".*_REDEFINED_OP_FLAG")
.allowlist_type("rb_num_t")
@@ -291,87 +237,82 @@ fn main() {
.allowlist_type("ruby_tag_type")
.allowlist_type("ruby_vm_throw_flags")
.allowlist_type("vm_check_match_type")
-
- // From yjit.c
+ .allowlist_type("vm_opt_newarray_send_type")
+ .allowlist_type("rb_iseq_type")
+ .allowlist_function("rb_object_shape_count")
+ .allowlist_function("rb_ivar_get_at")
+ .allowlist_function("rb_ivar_get_at_no_ractor_check")
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
.allowlist_function("rb_iseq_pc_at_idx")
.allowlist_function("rb_iseq_opcode_at_pc")
- .allowlist_function("rb_yjit_reserve_addr_space")
- .allowlist_function("rb_yjit_mark_writable")
- .allowlist_function("rb_yjit_mark_executable")
- .allowlist_function("rb_yjit_mark_unused")
- .allowlist_function("rb_yjit_get_page_size")
- .allowlist_function("rb_yjit_iseq_builtin_attrs")
+ .allowlist_function("rb_jit_reserve_addr_space")
+ .allowlist_function("rb_jit_mark_writable")
+ .allowlist_function("rb_jit_mark_executable")
+ .allowlist_function("rb_jit_mark_unused")
+ .allowlist_function("rb_jit_get_page_size")
+ .allowlist_function("rb_jit_iseq_builtin_attrs")
+ .allowlist_function("rb_yjit_iseq_inspect")
.allowlist_function("rb_yjit_builtin_function")
.allowlist_function("rb_set_cfp_(pc|sp)")
- .allowlist_function("rb_yjit_multi_ractor_p")
.allowlist_function("rb_c_method_tracing_currently_enabled")
.allowlist_function("rb_full_cfunc_return")
- .allowlist_function("rb_yjit_vm_lock_then_barrier")
- .allowlist_function("rb_yjit_vm_unlock")
.allowlist_function("rb_assert_(iseq|cme)_handle")
.allowlist_function("rb_IMEMO_TYPE_P")
+ .allowlist_function("rb_yjit_constcache_shareable")
.allowlist_function("rb_iseq_reset_jit_func")
.allowlist_function("rb_yjit_dump_iseq_loc")
- .allowlist_function("rb_yjit_for_each_iseq")
.allowlist_function("rb_yjit_obj_written")
.allowlist_function("rb_yjit_str_simple_append")
.allowlist_function("rb_RSTRING_PTR")
.allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
- .allowlist_function("rb_yjit_get_proc_ptr")
+ .allowlist_function("rb_jit_get_proc_ptr")
.allowlist_function("rb_yjit_exit_locations_dict")
- .allowlist_function("rb_yjit_icache_invalidate")
+ .allowlist_function("rb_jit_icache_invalidate")
.allowlist_function("rb_optimized_call")
- .allowlist_function("rb_yjit_assert_holding_vm_lock")
.allowlist_function("rb_yjit_sendish_sp_pops")
.allowlist_function("rb_yjit_invokeblock_sp_pops")
+ .allowlist_function("rb_yjit_cme_ractor_serial")
.allowlist_function("rb_yjit_set_exception_return")
- .allowlist_type("robject_offsets")
+ .allowlist_function("rb_jit_str_concat_codepoint")
.allowlist_type("rstring_offsets")
-
- // from vm_sync.h
+ .allowlist_function("rb_assert_holding_vm_lock")
+ .allowlist_function("rb_jit_shape_too_complex_p")
+ .allowlist_function("rb_jit_multi_ractor_p")
+ .allowlist_function("rb_jit_vm_lock_then_barrier")
+ .allowlist_function("rb_jit_vm_unlock")
+ .allowlist_function("rb_jit_for_each_iseq")
+ .allowlist_type("jit_bindgen_constants")
.allowlist_function("rb_vm_barrier")
// Not sure why it's picking these up, but don't.
.blocklist_type("FILE")
.blocklist_type("_IO_.*")
- // From internal/compile.h
.allowlist_function("rb_vm_insn_decode")
-
- // from internal/cont.h
.allowlist_function("rb_jit_cont_each_iseq")
-
- // From iseq.h
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")
.allowlist_function("rb_iseq_label")
.allowlist_function("rb_iseq_line_no")
-
- // From builtin.h
+ .allowlist_type("defined_type")
.allowlist_type("rb_builtin_function.*")
-
- // From internal/variable.h
.allowlist_function("rb_gvar_(get|set)")
.allowlist_function("rb_ensure_iv_list_size")
-
- // From include/ruby/internal/intern/variable.h
.allowlist_function("rb_attr_get")
.allowlist_function("rb_ivar_defined")
.allowlist_function("rb_ivar_get")
-
- // From internal/vm.h
- .allowlist_var("rb_vm_insns_count")
-
- // From include/ruby/internal/intern/vm.h
+ .allowlist_function("rb_mod_name")
+ .allowlist_function("rb_const_get")
+ .allowlist_var("rb_vm_insn_count")
+ .allowlist_function("rb_vm_instruction_size")
.allowlist_function("rb_get_alloc_func")
-
- // From gc.h and internal/gc.h
.allowlist_function("rb_class_allocate_instance")
+ .allowlist_function("rb_obj_equal")
+ .allowlist_function("rb_class_new_instance_pass_kw")
+ .allowlist_function("rb_obj_alloc")
.allowlist_function("rb_obj_info")
-
- // From include/ruby/debug.h
+ .allowlist_function("ruby_xfree")
.allowlist_function("rb_profile_frames")
// Functions used for code generation
@@ -398,21 +339,23 @@ fn main() {
.allowlist_function("rb_get_def_iseq_ptr")
.allowlist_function("rb_get_def_bmethod_proc")
.allowlist_function("rb_iseq_encoded_size")
- .allowlist_function("rb_get_iseq_body_total_calls")
.allowlist_function("rb_get_iseq_body_local_iseq")
.allowlist_function("rb_get_iseq_body_parent_iseq")
.allowlist_function("rb_get_iseq_body_iseq_encoded")
.allowlist_function("rb_get_iseq_body_stack_max")
+ .allowlist_function("rb_get_iseq_body_type")
.allowlist_function("rb_get_iseq_flags_has_lead")
.allowlist_function("rb_get_iseq_flags_has_opt")
.allowlist_function("rb_get_iseq_flags_has_kw")
.allowlist_function("rb_get_iseq_flags_has_rest")
.allowlist_function("rb_get_iseq_flags_has_post")
.allowlist_function("rb_get_iseq_flags_has_kwrest")
+ .allowlist_function("rb_get_iseq_flags_anon_kwrest")
.allowlist_function("rb_get_iseq_flags_has_block")
.allowlist_function("rb_get_iseq_flags_ambiguous_param0")
.allowlist_function("rb_get_iseq_flags_accepts_no_kwarg")
.allowlist_function("rb_get_iseq_flags_ruby2_keywords")
+ .allowlist_function("rb_get_iseq_flags_forwardable")
.allowlist_function("rb_get_iseq_body_local_table_size")
.allowlist_function("rb_get_iseq_body_param_keyword")
.allowlist_function("rb_get_iseq_body_param_size")
@@ -425,8 +368,9 @@ fn main() {
.allowlist_function("rb_yarv_str_eql_internal")
.allowlist_function("rb_str_neq_internal")
.allowlist_function("rb_yarv_ary_entry_internal")
- .allowlist_function("rb_yjit_fix_div_fix")
- .allowlist_function("rb_yjit_fix_mod_fix")
+ .allowlist_function("rb_yjit_ruby2_keywords_splat_p")
+ .allowlist_function("rb_jit_fix_div_fix")
+ .allowlist_function("rb_jit_fix_mod_fix")
.allowlist_function("rb_FL_TEST")
.allowlist_function("rb_FL_TEST_RAW")
.allowlist_function("rb_RB_TYPE_P")
@@ -440,16 +384,18 @@ fn main() {
.allowlist_function("rb_METHOD_ENTRY_VISI")
.allowlist_function("rb_RCLASS_ORIGIN")
.allowlist_function("rb_method_basic_definition_p")
- .allowlist_function("rb_yjit_array_len")
+ .allowlist_function("rb_jit_array_len")
.allowlist_function("rb_obj_class")
.allowlist_function("rb_obj_is_proc")
.allowlist_function("rb_vm_base_ptr")
.allowlist_function("rb_ec_stack_check")
+ .allowlist_function("rb_vm_top_self")
+ .allowlist_function("rb_yjit_splat_varg_checks")
+ .allowlist_function("rb_yjit_splat_varg_cfunc")
// We define VALUE manually, don't import it
.blocklist_type("VALUE")
- // From iseq.h
.opaque_type("rb_iseq_t")
.blocklist_type("rb_iseq_t")
diff --git a/yjit/not_gmake.mk b/yjit/not_gmake.mk
index 8bb01d65be..0d95d8ddf1 100644
--- a/yjit/not_gmake.mk
+++ b/yjit/not_gmake.mk
@@ -12,7 +12,7 @@ yjit-static-lib:
$(Q) $(RUSTC) $(YJIT_RUSTC_ARGS)
# Assume GNU flavor LD and OBJCOPY. Works on FreeBSD 13, at least.
-$(YJIT_LIBOBJ): $(YJIT_LIBS)
+$(RUST_LIBOBJ): $(YJIT_LIBS)
$(ECHO) 'partial linking $(YJIT_LIBS) into $@'
$(Q) $(LD) -r -o $@ --whole-archive $(YJIT_LIBS)
-$(Q) $(OBJCOPY) --wildcard --keep-global-symbol='$(SYMBOL_PREFIX)rb_*' $(@)
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs
index 6b71a73d2c..70a439afd5 100644
--- a/yjit/src/asm/arm64/arg/bitmask_imm.rs
+++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs
@@ -42,7 +42,7 @@ impl TryFrom<u64> for BitmaskImmediate {
/// Attempt to convert a u64 into a BitmaskImmediate.
///
/// The implementation here is largely based on this blog post:
- /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/
+ /// <https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/>
fn try_from(value: u64) -> Result<Self, Self::Error> {
if value == 0 || value == u64::MAX {
return Err(());
diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs
index 41d71920cb..6229d5c1fd 100644
--- a/yjit/src/asm/arm64/arg/sys_reg.rs
+++ b/yjit/src/asm/arm64/arg/sys_reg.rs
@@ -1,6 +1,6 @@
/// The encoded representation of an A64 system register.
-/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/
+/// <https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/>
pub enum SystemRegister {
- /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en
+ /// <https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en>
NZCV = 0b1_011_0100_0010_000
}
diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs
index 5ce497209c..dce9affedf 100644
--- a/yjit/src/asm/arm64/inst/atomic.rs
+++ b/yjit/src/asm/arm64/inst/atomic.rs
@@ -43,13 +43,13 @@ pub struct Atomic {
impl Atomic {
/// LDADDAL
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en>
pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
Self { rt, rn, rs, size: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<Atomic> for u32 {
diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs
index f15ef2a9b0..14fcb2e9fd 100644
--- a/yjit/src/asm/arm64/inst/branch.rs
+++ b/yjit/src/asm/arm64/inst/branch.rs
@@ -28,25 +28,25 @@ pub struct Branch {
impl Branch {
/// BR
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en>
pub fn br(rn: u8) -> Self {
Self { rn, op: Op::BR }
}
/// BLR
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en>
pub fn blr(rn: u8) -> Self {
Self { rn, op: Op::BLR }
}
/// RET
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en>
pub fn ret(rn: u8) -> Self {
Self { rn, op: Op::RET }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en>
const FAMILY: u32 = 0b101;
impl From<Branch> for u32 {
diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs
index fcc07f69aa..266e9ccb31 100644
--- a/yjit/src/asm/arm64/inst/branch_cond.rs
+++ b/yjit/src/asm/arm64/inst/branch_cond.rs
@@ -19,13 +19,13 @@ pub struct BranchCond {
impl BranchCond {
/// B.cond
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally->
pub fn bcond(cond: u8, offset: InstructionOffset) -> Self {
Self { cond, offset }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en>
const FAMILY: u32 = 0b101;
impl From<BranchCond> for u32 {
diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs
index be4920ac76..d66a35c4c6 100644
--- a/yjit/src/asm/arm64/inst/breakpoint.rs
+++ b/yjit/src/asm/arm64/inst/breakpoint.rs
@@ -13,13 +13,13 @@ pub struct Breakpoint {
impl Breakpoint {
/// BRK
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction-
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction->
pub fn brk(imm16: u16) -> Self {
Self { imm16 }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control>
const FAMILY: u32 = 0b101;
impl From<Breakpoint> for u32 {
diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs
index 74debac7f7..fd26d09f8a 100644
--- a/yjit/src/asm/arm64/inst/call.rs
+++ b/yjit/src/asm/arm64/inst/call.rs
@@ -29,19 +29,19 @@ pub struct Call {
impl Call {
/// B
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch->
pub fn b(offset: InstructionOffset) -> Self {
Self { offset, op: Op::Branch }
}
/// BL
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en>
pub fn bl(offset: InstructionOffset) -> Self {
Self { offset, op: Op::BranchWithLink }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en>
const FAMILY: u32 = 0b101;
impl From<Call> for u32 {
diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs
index e1950e95b4..1e26c7408b 100644
--- a/yjit/src/asm/arm64/inst/conditional.rs
+++ b/yjit/src/asm/arm64/inst/conditional.rs
@@ -28,13 +28,13 @@ pub struct Conditional {
impl Conditional {
/// CSEL
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en>
pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self {
Self { rd, rn, cond, rm, sf: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel>
const FAMILY: u32 = 0b101;
impl From<Conditional> for u32 {
diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs
index b474b00a52..ea71705478 100644
--- a/yjit/src/asm/arm64/inst/data_imm.rs
+++ b/yjit/src/asm/arm64/inst/data_imm.rs
@@ -44,37 +44,37 @@ pub struct DataImm {
impl DataImm {
/// ADD (immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en>
pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() }
}
/// ADDS (immediate, set flags)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en>
pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() }
}
/// CMP (immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en>
pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
Self::subs(31, rn, imm, num_bits)
}
/// SUB (immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en>
pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() }
}
/// SUBS (immediate, set flags)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en>
pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en>
const FAMILY: u32 = 0b1000;
impl From<DataImm> for u32 {
diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs
index a742121f1f..ed4afa956b 100644
--- a/yjit/src/asm/arm64/inst/data_reg.rs
+++ b/yjit/src/asm/arm64/inst/data_reg.rs
@@ -57,7 +57,7 @@ pub struct DataReg {
impl DataReg {
/// ADD (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en>
pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self {
rd,
@@ -72,7 +72,7 @@ impl DataReg {
}
/// ADDS (shifted register, set flags)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en>
pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self {
rd,
@@ -87,13 +87,13 @@ impl DataReg {
}
/// CMP (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en>
pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self {
Self::subs(31, rn, rm, num_bits)
}
/// SUB (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en>
pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self {
rd,
@@ -108,7 +108,7 @@ impl DataReg {
}
/// SUBS (shifted register, set flags)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en>
pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self {
rd,
@@ -123,7 +123,7 @@ impl DataReg {
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en>
const FAMILY: u32 = 0b0101;
impl From<DataReg> for u32 {
diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs
index 0ddae8e8de..863ac947dd 100644
--- a/yjit/src/asm/arm64/inst/halfword_imm.rs
+++ b/yjit/src/asm/arm64/inst/halfword_imm.rs
@@ -53,43 +53,43 @@ pub struct HalfwordImm {
impl HalfwordImm {
/// LDRH
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-->
pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self {
Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load }
}
/// LDRH (pre-index)
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-->
pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load }
}
/// LDRH (post-index)
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-->
pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load }
}
/// STRH
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-->
pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self {
Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store }
}
/// STRH (pre-index)
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-->
pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store }
}
/// STRH (post-index)
- /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ /// <https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-->
pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b111100;
impl From<HalfwordImm> for u32 {
diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs
index 3eade205c8..817e893553 100644
--- a/yjit/src/asm/arm64/inst/load_literal.rs
+++ b/yjit/src/asm/arm64/inst/load_literal.rs
@@ -40,13 +40,13 @@ pub struct LoadLiteral {
impl LoadLiteral {
/// LDR (load literal)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en>
pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self {
Self { rt, offset, opc: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<LoadLiteral> for u32 {
diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs
index 3426b9ba5f..3d94e8da1f 100644
--- a/yjit/src/asm/arm64/inst/load_register.rs
+++ b/yjit/src/asm/arm64/inst/load_register.rs
@@ -61,13 +61,13 @@ pub struct LoadRegister {
impl LoadRegister {
/// LDR
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en>
pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<LoadRegister> for u32 {
diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs
index b5c8a3c294..e27909ae35 100644
--- a/yjit/src/asm/arm64/inst/load_store.rs
+++ b/yjit/src/asm/arm64/inst/load_store.rs
@@ -66,67 +66,67 @@ pub struct LoadStore {
impl LoadStore {
/// LDR (immediate, post-index)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-->
pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() }
}
/// LDR (immediate, pre-index)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-->
pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() }
}
/// LDUR (load register, unscaled)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en>
pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() }
}
/// LDURH Load Register Halfword (unscaled)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en>
pub fn ldurh(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size16 }
}
/// LDURB (load register, byte, unscaled)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en>
pub fn ldurb(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size8 }
}
/// LDURSW (load register, unscaled, signed)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en>
pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 }
}
/// STR (immediate, post-index)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-->
pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() }
}
/// STR (immediate, pre-index)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-->
pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() }
}
/// STUR (store register, unscaled)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en>
pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() }
}
/// STURH (store register, halfword, unscaled)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en>
pub fn sturh(rt: u8, rn: u8, imm9: i16) -> Self {
Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: Size::Size16 }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<LoadStore> for u32 {
diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
index 8216c2200a..1106b4cb37 100644
--- a/yjit/src/asm/arm64/inst/load_store_exclusive.rs
+++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
@@ -52,19 +52,19 @@ pub struct LoadStoreExclusive {
impl LoadStoreExclusive {
/// LDAXR
- /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
+ /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register->
pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
}
/// STLXR
- /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
+ /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register->
pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<LoadStoreExclusive> for u32 {
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
index b24916f8a5..d57ad5f5b7 100644
--- a/yjit/src/asm/arm64/inst/logical_imm.rs
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -44,43 +44,43 @@ pub struct LogicalImm {
impl LogicalImm {
/// AND (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en>
pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() }
}
/// ANDS (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en>
pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() }
}
/// EOR (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate-->
pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() }
}
/// MOV (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en>
pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() }
}
/// ORR (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-->
pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() }
}
/// TST (bitmask immediate)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en>
pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
Self::ands(31, rn, imm, num_bits)
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm>
const FAMILY: u32 = 0b1001;
impl From<LogicalImm> for u32 {
diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs
index a96805c9f9..18edff606f 100644
--- a/yjit/src/asm/arm64/inst/logical_reg.rs
+++ b/yjit/src/asm/arm64/inst/logical_reg.rs
@@ -70,55 +70,55 @@ pub struct LogicalReg {
impl LogicalReg {
/// AND (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en>
pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() }
}
/// ANDS (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en>
pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
}
/// EOR (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register-->
pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() }
}
/// MOV (register)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en>
pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
}
/// MVN (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en>
pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
}
/// ORN (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-->
pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
}
/// ORR (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register--
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-->
pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
}
/// TST (shifted register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en>
pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en>
const FAMILY: u32 = 0b0101;
impl From<LogicalReg> for u32 {
diff --git a/yjit/src/asm/arm64/inst/madd.rs b/yjit/src/asm/arm64/inst/madd.rs
index 683e643189..71f2ab230a 100644
--- a/yjit/src/asm/arm64/inst/madd.rs
+++ b/yjit/src/asm/arm64/inst/madd.rs
@@ -28,7 +28,7 @@ pub struct MAdd {
impl MAdd {
/// MUL
- /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/MUL--Multiply--an-alias-of-MADD-
+ /// <https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/MUL--Multiply--an-alias-of-MADD->
pub fn mul(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
Self { rd, rn, ra: 0b11111, rm, sf: num_bits.into() }
}
diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs
index e7cb9215b0..eae4565c3a 100644
--- a/yjit/src/asm/arm64/inst/mov.rs
+++ b/yjit/src/asm/arm64/inst/mov.rs
@@ -56,19 +56,19 @@ pub struct Mov {
impl Mov {
/// MOVK
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en>
pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() }
}
/// MOVZ
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en>
pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en>
const FAMILY: u32 = 0b1000;
impl From<Mov> for u32 {
diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs
index d58b3574a9..081d8558f5 100644
--- a/yjit/src/asm/arm64/inst/nop.rs
+++ b/yjit/src/asm/arm64/inst/nop.rs
@@ -10,7 +10,7 @@ pub struct Nop;
impl Nop {
/// NOP
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation-
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation->
pub fn nop() -> Self {
Self {}
}
diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs
index bd1a2b9367..2ea586a778 100644
--- a/yjit/src/asm/arm64/inst/pc_rel.rs
+++ b/yjit/src/asm/arm64/inst/pc_rel.rs
@@ -30,19 +30,19 @@ pub struct PCRelative {
impl PCRelative {
/// ADR
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address-
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address->
pub fn adr(rd: u8, imm: i32) -> Self {
Self { rd, imm, op: Op::ADR }
}
/// ADRP
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page-
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page->
pub fn adrp(rd: u8, imm: i32) -> Self {
Self { rd, imm: imm >> 12, op: Op::ADRP }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en>
const FAMILY: u32 = 0b1000;
impl From<PCRelative> for u32 {
diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs
index 87690e3b4a..9bffcd8479 100644
--- a/yjit/src/asm/arm64/inst/reg_pair.rs
+++ b/yjit/src/asm/arm64/inst/reg_pair.rs
@@ -68,49 +68,49 @@ impl RegisterPair {
}
/// LDP (signed offset)
- /// LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ /// `LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en>
pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits)
}
/// LDP (pre-index)
- /// LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ /// `LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en>
pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits)
}
/// LDP (post-index)
- /// LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ /// `LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en>
pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits)
}
/// STP (signed offset)
- /// STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ /// `STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en>
pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits)
}
/// STP (pre-index)
- /// STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ /// `STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en>
pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits)
}
/// STP (post-index)
- /// STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ /// `STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>`
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en>
pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits)
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en>
const FAMILY: u32 = 0b0100;
impl From<RegisterPair> for u32 {
diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs
index 8602998980..12944ba722 100644
--- a/yjit/src/asm/arm64/inst/sbfm.rs
+++ b/yjit/src/asm/arm64/inst/sbfm.rs
@@ -32,7 +32,7 @@ pub struct SBFM {
impl SBFM {
/// ASR
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en>
pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
let (imms, n) = if num_bits == 64 {
(0b111111, true)
@@ -44,13 +44,13 @@ impl SBFM {
}
/// SXTW
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en>
pub fn sxtw(rd: u8, rn: u8) -> Self {
Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield>
const FAMILY: u32 = 0b1001;
impl From<SBFM> for u32 {
diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs
index 3d2685a997..9dac9a1408 100644
--- a/yjit/src/asm/arm64/inst/shift_imm.rs
+++ b/yjit/src/asm/arm64/inst/shift_imm.rs
@@ -38,13 +38,13 @@ pub struct ShiftImm {
impl ShiftImm {
/// LSL (immediate)
- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en>
pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() }
}
/// LSR (immediate)
- /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en>
pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() }
}
@@ -85,7 +85,7 @@ impl ShiftImm {
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield>
const FAMILY: u32 = 0b10011;
impl From<ShiftImm> for u32 {
diff --git a/yjit/src/asm/arm64/inst/smulh.rs b/yjit/src/asm/arm64/inst/smulh.rs
index 5e9b231fde..f355cb6531 100644
--- a/yjit/src/asm/arm64/inst/smulh.rs
+++ b/yjit/src/asm/arm64/inst/smulh.rs
@@ -22,7 +22,7 @@ pub struct SMulH {
impl SMulH {
/// SMULH
- /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/SMULH--Signed-Multiply-High-
+ /// <https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/SMULH--Signed-Multiply-High->
pub fn smulh(rd: u8, rn: u8, rm: u8) -> Self {
Self { rd, rn, ra: 0b11111, rm }
}
diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs
index 108737a870..7191dfbfd9 100644
--- a/yjit/src/asm/arm64/inst/sys_reg.rs
+++ b/yjit/src/asm/arm64/inst/sys_reg.rs
@@ -32,19 +32,19 @@ pub struct SysReg {
impl SysReg {
/// MRS (register)
- /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en>
pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self {
SysReg { rt, systemreg, l: L::MRS }
}
/// MSR (register)
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en>
pub fn msr(systemreg: SystemRegister, rt: u8) -> Self {
SysReg { rt, systemreg, l: L::MSR }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove>
const FAMILY: u32 = 0b110101010001;
impl From<SysReg> for u32 {
diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs
index c57a05ad2b..f7aeca70fd 100644
--- a/yjit/src/asm/arm64/inst/test_bit.rs
+++ b/yjit/src/asm/arm64/inst/test_bit.rs
@@ -60,19 +60,19 @@ pub struct TestBit {
impl TestBit {
/// TBNZ
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en>
pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self {
Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() }
}
/// TBZ
- /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en
+ /// <https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en>
pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self {
Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() }
}
}
-/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+/// <https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en>
const FAMILY: u32 = 0b11011;
impl From<TestBit> for u32 {
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index eb99c00ba7..18b5270f9d 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -215,6 +215,9 @@ pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
imm_fits_bits(offset, 19)
}
+/// CBZ and CBNZ also have a limit of 19 bits for the branch offset.
+pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits;
+
/// B.cond - branch to target if condition is true
pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) {
assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less.");
@@ -254,7 +257,7 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) {
/// BRK - create a breakpoint
pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) {
let bytes: [u8; 4] = match imm16 {
- A64Opnd::None => Breakpoint::brk(0).into(),
+ A64Opnd::None => Breakpoint::brk(0xf000).into(),
A64Opnd::UImm(imm16) => {
assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
Breakpoint::brk(imm16 as u16).into()
@@ -276,6 +279,9 @@ pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into()
},
+ (A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ DataImm::cmp(rn.reg_no, (imm12 as u64).try_into().unwrap(), rn.num_bits).into()
+ },
(A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into()
},
@@ -1093,6 +1099,48 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
cb.write_bytes(&bytes);
}
+/// CBZ - branch if a register is zero
+pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
+ assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
+ let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
+ cbz_cbnz(rt.num_bits, false, offset, rt.reg_no)
+ } else {
+ panic!("Invalid operand combination to cbz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// CBNZ - branch if a register is non-zero
+pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
+ assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
+ let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
+ cbz_cbnz(rt.num_bits, true, offset, rt.reg_no)
+ } else {
+ panic!("Invalid operand combination to cbnz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ)
+/// with `op=1`.
+///
+/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero->
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | sf 0 1 1 0 1 0 op |
+/// | imm19........................................................... Rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] {
+ ((Sf::from(num_bits) as u32) << 31 |
+ 0b11010 << 25 |
+ u32::from(op) << 24 |
+ truncate_imm::<_, 19>(offset) << 5 |
+ rt as u32).to_le_bytes()
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -1268,8 +1316,26 @@ mod tests {
}
#[test]
+ fn test_cbz() {
+ let offset = InstructionOffset::from_insns(-1);
+ check_bytes("e0ffffb4e0ffff34", |cb| {
+ cbz(cb, X0, offset);
+ cbz(cb, W0, offset);
+ });
+ }
+
+ #[test]
+ fn test_cbnz() {
+ let offset = InstructionOffset::from_insns(2);
+ check_bytes("540000b554000035", |cb| {
+ cbnz(cb, X20, offset);
+ cbnz(cb, W20, offset);
+ });
+ }
+
+ #[test]
fn test_brk_none() {
- check_bytes("000020d4", |cb| brk(cb, A64Opnd::None));
+ check_bytes("00003ed4", |cb| brk(cb, A64Opnd::None));
}
#[test]
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 524d6341f5..9ef675b34d 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -1,17 +1,14 @@
-use std::cell::RefCell;
use std::fmt;
use std::mem;
use std::rc::Rc;
+use std::collections::BTreeMap;
+
use crate::core::IseqPayload;
use crate::core::for_each_off_stack_iseq_payload;
use crate::core::for_each_on_stack_iseq_payload;
use crate::invariants::rb_yjit_tracing_invalidate_all;
use crate::stats::incr_counter;
use crate::virtualmem::WriteError;
-
-#[cfg(feature = "disasm")]
-use std::collections::BTreeMap;
-
use crate::codegen::CodegenGlobals;
use crate::virtualmem::{VirtualMem, CodePtr};
@@ -46,7 +43,7 @@ pub struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
// Memory for storing the encoded instructions
- mem_block: Rc<RefCell<VirtualMem>>,
+ mem_block: Rc<VirtualMem>,
// Size of a code page in bytes. Each code page is split into an inlined and an outlined portion.
// Code GC collects code memory at this granularity.
@@ -77,8 +74,10 @@ pub struct CodeBlock {
// References to labels
label_refs: Vec<LabelRef>,
+ // A switch for keeping comments. They take up memory.
+ keep_comments: bool,
+
// Comments for assembly instructions, if that feature is enabled
- #[cfg(feature = "disasm")]
asm_comments: BTreeMap<usize, Vec<String>>,
// True for OutlinedCb
@@ -107,16 +106,16 @@ impl CodeBlock {
const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024;
/// Make a new CodeBlock
- pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self {
+ pub fn new(mem_block: Rc<VirtualMem>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self {
// Pick the code page size
- let system_page_size = mem_block.borrow().system_page_size();
+ let system_page_size = mem_block.system_page_size();
let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size {
Self::PREFERRED_CODE_PAGE_SIZE
} else {
system_page_size
};
- let mem_size = mem_block.borrow().virtual_region_size();
+ let mem_size = mem_block.virtual_region_size();
let mut cb = Self {
mem_block,
mem_size,
@@ -128,7 +127,7 @@ impl CodeBlock {
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
- #[cfg(feature = "disasm")]
+ keep_comments,
asm_comments: BTreeMap::new(),
outlined,
dropped_bytes: false,
@@ -145,6 +144,7 @@ impl CodeBlock {
/// Move the CodeBlock to the next page. If it's on the furthest page,
/// move the other CodeBlock to the next page as well.
+ #[must_use]
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
let old_write_ptr = self.get_write_ptr();
self.set_write_ptr(base_ptr);
@@ -237,9 +237,9 @@ impl CodeBlock {
}
// Free the grouped pages at once
- let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
+ let start_ptr = self.mem_block.start_ptr().add_bytes(page_idx * self.page_size);
let batch_size = self.page_size * batch_idxs.len();
- self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
+ self.mem_block.free_bytes(start_ptr, batch_size as u32);
}
}
@@ -248,13 +248,13 @@ impl CodeBlock {
}
pub fn mapped_region_size(&self) -> usize {
- self.mem_block.borrow().mapped_region_size()
+ self.mem_block.mapped_region_size()
}
/// Size of the region in bytes where writes could be attempted.
#[cfg(target_arch = "aarch64")]
pub fn virtual_region_size(&self) -> usize {
- self.mem_block.borrow().virtual_region_size()
+ self.mem_block.virtual_region_size()
}
/// Return the number of code pages that have been mapped by the VirtualMemory.
@@ -266,7 +266,7 @@ impl CodeBlock {
/// Return the number of code pages that have been reserved by the VirtualMemory.
pub fn num_virtual_pages(&self) -> usize {
- let virtual_region_size = self.mem_block.borrow().virtual_region_size();
+ let virtual_region_size = self.mem_block.virtual_region_size();
// CodeBlock's page size != VirtualMem's page size on Linux,
// so mapped_region_size % self.page_size may not be 0
((virtual_region_size - 1) / self.page_size) + 1
@@ -366,9 +366,11 @@ impl CodeBlock {
}
/// Add an assembly comment if the feature is on.
- /// If not, this becomes an inline no-op.
- #[cfg(feature = "disasm")]
pub fn add_comment(&mut self, comment: &str) {
+ if !self.keep_comments {
+ return;
+ }
+
let cur_ptr = self.get_write_ptr().raw_addr(self);
// If there's no current list of comments for this line number, add one.
@@ -379,28 +381,21 @@ impl CodeBlock {
this_line_comments.push(comment.to_string());
}
}
- #[cfg(not(feature = "disasm"))]
- #[inline]
- pub fn add_comment(&mut self, _: &str) {}
- #[cfg(feature = "disasm")]
pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> {
self.asm_comments.get(&pos)
}
- #[allow(unused_variables)]
- #[cfg(feature = "disasm")]
pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) {
+ if self.asm_comments.is_empty() {
+ return;
+ }
for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) {
self.asm_comments.remove(&addr);
}
}
- #[cfg(not(feature = "disasm"))]
- #[inline]
- pub fn remove_comments(&mut self, _: CodePtr, _: CodePtr) {}
pub fn clear_comments(&mut self) {
- #[cfg(feature = "disasm")]
self.asm_comments.clear();
}
@@ -413,7 +408,7 @@ impl CodeBlock {
}
pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
- self.mem_block.borrow_mut().write_byte(write_ptr, byte)
+ self.mem_block.write_byte(write_ptr, byte)
}
// Set the current write position
@@ -427,31 +422,31 @@ impl CodeBlock {
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
+ let pos = code_ptr.as_offset() - self.mem_block.start_ptr().as_offset();
self.set_pos(pos.try_into().unwrap());
}
/// Get a (possibly dangling) direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
- self.mem_block.borrow().start_ptr().add_bytes(offset)
+ self.mem_block.start_ptr().add_bytes(offset)
}
/// Convert an address range to memory page indexes against a num_pages()-sized array.
- pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
- let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
- let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
+ pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> impl Iterator<Item = usize> {
+ let mem_start = self.mem_block.start_ptr().raw_addr(self);
+ let mem_end = self.mem_block.mapped_end_ptr().raw_addr(self);
assert!(mem_start <= start_addr.raw_addr(self));
assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
assert!(end_addr.raw_addr(self) <= mem_end);
// Ignore empty code ranges
if start_addr == end_addr {
- return vec![];
+ return 0..0;
}
let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size;
let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size;
- (start_page..=end_page).collect() // TODO: consider returning an iterator
+ start_page..end_page + 1
}
/// Get a (possibly dangling) direct pointer to the current write position
@@ -462,7 +457,7 @@ impl CodeBlock {
/// Write a single byte at the current position.
pub fn write_byte(&mut self, byte: u8) {
let write_ptr = self.get_write_ptr();
- if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() {
+ if self.has_capacity(1) && self.mem_block.write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
self.dropped_bytes = true;
@@ -594,8 +589,12 @@ impl CodeBlock {
self.label_refs = state.label_refs;
}
+ pub fn mark_all_writeable(&mut self) {
+ self.mem_block.mark_all_writeable();
+ }
+
pub fn mark_all_executable(&mut self) {
- self.mem_block.borrow_mut().mark_all_executable();
+ self.mem_block.mark_all_executable();
}
/// Code GC. Free code pages that are not on stack and reuse them.
@@ -691,9 +690,9 @@ impl CodeBlock {
let alloc = TestingAllocator::new(mem_size);
let mem_start: *const u8 = alloc.mem_start();
- let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
- Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None))
+ Self::new(Rc::new(virt_mem), false, Rc::new(None), true)
}
/// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code.
@@ -709,9 +708,9 @@ impl CodeBlock {
let alloc = TestingAllocator::new(mem_size);
let mem_start: *const u8 = alloc.mem_start();
- let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
- Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)))
+ Self::new(Rc::new(virt_mem), false, Rc::new(Some(freed_pages)), true)
}
}
@@ -719,7 +718,7 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
- let mem_block = &*self.mem_block.borrow();
+ let mem_block = &*self.mem_block;
let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
@@ -729,7 +728,7 @@ impl fmt::LowerHex for CodeBlock {
impl crate::virtualmem::CodePtrBase for CodeBlock {
fn base_ptr(&self) -> std::ptr::NonNull<u8> {
- self.mem_block.borrow().base_ptr()
+ self.mem_block.base_ptr()
}
}
@@ -828,7 +827,7 @@ mod tests
assert_eq!(cb.code_size(), 4);
// Moving to the next page should not increase code_size
- cb.next_page(cb.get_write_ptr(), |_, _| {});
+ assert!(cb.next_page(cb.get_write_ptr(), |_, _| {}));
assert_eq!(cb.code_size(), 4);
// Write 4 bytes in the second page
@@ -841,7 +840,7 @@ mod tests
cb.write_bytes(&[1, 1, 1, 1]);
// Moving from an old page to the next page should not increase code_size
- cb.next_page(cb.get_write_ptr(), |_, _| {});
+ assert!(cb.next_page(cb.get_write_ptr(), |_, _| {}));
cb.set_pos(old_write_pos);
assert_eq!(cb.code_size(), 8);
}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index 20ac13f09c..0ef5e92117 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -952,6 +952,7 @@ pub fn jmp32(cb: &mut CodeBlock, offset: i32) {
pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
if let X86Opnd::Reg(reg) = dst {
assert!(reg.num_bits == 64);
+ assert!(matches!(src, X86Opnd::Mem(_) | X86Opnd::IPRel(_)));
write_rm(cb, false, true, dst, src, None, &[0x8d]);
} else {
unreachable!();
@@ -1026,7 +1027,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
}
let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
- assert!(imm_num_bits(imm.value) <= (output_num_bits as u8));
+ assert!(
+ mem.num_bits < 64 || imm_num_bits(imm.value) <= (output_num_bits as u8),
+ "immediate value should be small enough to survive sign extension"
+ );
cb.write_int(imm.value as u64, output_num_bits);
},
// M + UImm
@@ -1041,7 +1045,10 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
}
let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
- assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8));
+ assert!(
+ mem.num_bits < 64 || imm_num_bits(uimm.value as i64) <= (output_num_bits as u8),
+ "immediate value should be small enough to survive sign extension"
+ );
cb.write_int(uimm.value, output_num_bits);
},
// * + Imm/UImm
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
index 5ae983270f..eefcbfd52e 100644
--- a/yjit/src/asm/x86_64/tests.rs
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -193,6 +193,7 @@ fn test_mov() {
check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1)));
//check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine?
check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17)));
+ check_bytes("c7400401000080", |cb| mov(cb, mem_opnd(32, RAX, 4), uimm_opnd(0x80000001)));
check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX));
check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10));
check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12)));
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 52d844e121..0521e09d0b 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -98,7 +98,7 @@ fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
#[cfg(not(test))]
{
let end = cb.get_write_ptr();
- unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
+ unsafe { rb_jit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
}
}
@@ -315,7 +315,11 @@ impl Assembler
match opnd {
Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd,
Opnd::Mem(_) => split_load_operand(asm, opnd),
- Opnd::Imm(_) => asm.load(opnd),
+ Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() {
+ opnd
+ } else {
+ asm.load(opnd)
+ }
Opnd::UImm(uimm) => {
if ShiftedImmediate::try_from(uimm).is_ok() {
opnd
@@ -377,7 +381,7 @@ impl Assembler
}
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
- let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let asm = &mut asm_local;
let mut iterator = self.into_draining_iter();
@@ -427,14 +431,62 @@ impl Assembler
}
}
},
- Insn::And { left, right, .. } |
- Insn::Or { left, right, .. } |
- Insn::Xor { left, right, .. } => {
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right);
*left = opnd0;
*right = opnd1;
+
+ // Since these instructions are lowered to an instruction that have 2 input
+ // registers and an output register, look to merge with an `Insn::Mov` that
+ // follows which puts the output in another register. For example:
+ // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`.
+ if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) {
+ if live_ranges[index] == index + 1 {
+ // Check after potentially lowering a stack operand to a register operand
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ if out == src && matches!(lowered_dest, Opnd::Reg(_)) {
+ *out = lowered_dest;
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ }
+ }
+
asm.push_insn(insn);
- },
+ }
+ // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch.
+ ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } |
+ ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if {
+ let same_opnd_if_test = if let Insn::Test { .. } = insn {
+ left == right
+ } else {
+ true
+ };
+
+ same_opnd_if_test && if let Some(
+ Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target)
+ ) = iterator.peek() {
+ matches!(target, Target::SideExit { .. })
+ } else {
+ false
+ }
+ } => {
+ let reg = split_load_operand(asm, *left);
+ match iterator.peek() {
+ Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)),
+ Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)),
+ _ => ()
+ }
+
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged jump instruction
+ }
Insn::CCall { opnds, fptr, .. } => {
assert!(opnds.len() <= C_ARG_OPNDS.len());
@@ -655,7 +707,7 @@ impl Assembler
},
Insn::Mul { left, right, .. } => {
let opnd0 = split_load_operand(asm, *left);
- let opnd1 = split_shifted_immediate(asm, *right);
+ let opnd1 = split_load_operand(asm, *right);
asm.mul(opnd0, opnd1);
},
Insn::Test { left, right } => {
@@ -787,14 +839,52 @@ impl Assembler
};
}
- /// Emit a push instruction for the given operand by adding to the stack
- /// pointer and then storing the given value.
+ /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero
+ fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) {
+ if let Target::SideExitPtr(dst_ptr) = target {
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
+
+ if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) {
+ // If the offset fits in one instruction, generate cbz or cbnz
+ let bytes = (dst_addr - src_addr) as i32;
+ if branch_if_zero {
+ cbz(cb, reg, InstructionOffset::from_bytes(bytes));
+ } else {
+ cbnz(cb, reg, InstructionOffset::from_bytes(bytes));
+ }
+ } else {
+ // Otherwise, we load the address into a register and
+ // use the branch register instruction. Note that because
+ // side exits should always be close, this form should be
+ // rare or impossible to see.
+ let dst_addr = dst_ptr.raw_addr(cb) as u64;
+ let load_insns: i32 = emit_load_size(dst_addr).into();
+
+ // Write out the inverse condition so that if
+ // it doesn't match it will skip over the
+ // instructions used for branching.
+ if branch_if_zero {
+ cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ } else {
+ cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ }
+ emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
+ br(cb, Assembler::SCRATCH0);
+
+ }
+ } else {
+ unreachable!("We should only generate Joz/Jonz with side-exit targets");
+ }
+ }
+
+ /// Push a value to the stack by subtracting from the stack pointer then storing,
+ /// leaving an 8-byte gap for alignment.
fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP));
}
- /// Emit a pop instruction into the given operand by loading the value
- /// and then subtracting from the stack pointer.
+ /// Pop a value from the stack by loading `[sp]` then adding to the stack pointer.
fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) {
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
}
@@ -833,9 +923,7 @@ impl Assembler
match insn {
Insn::Comment(text) => {
- if cfg!(feature = "disasm") {
- cb.add_comment(text);
- }
+ cb.add_comment(text);
},
Insn::Label(target) => {
cb.write_label(target.unwrap_label_idx());
@@ -1066,8 +1154,8 @@ impl Assembler
let regs = Assembler::get_caller_save_regs();
// Pop the state/flags register
- msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
emit_pop(cb, Self::SCRATCH0);
+ msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
for reg in regs.into_iter().rev() {
emit_pop(cb, A64Opnd::Reg(reg));
@@ -1135,6 +1223,9 @@ impl Assembler
Insn::Jg(target) => {
emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?);
},
+ Insn::Jge(target) => {
+ emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
Insn::Jbe(target) => {
emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?);
},
@@ -1144,6 +1235,12 @@ impl Assembler
Insn::Jo(target) => {
emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?);
},
+ Insn::Joz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jonz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?);
+ },
Insn::IncrCounter { mem, value } => {
let label = cb.new_label("incr_counter_loop".to_string());
cb.write_label(label);
@@ -1243,16 +1340,13 @@ impl Assembler
Err(EmitError::RetryOnNextPage) => {
// we want to lower jumps to labels to b.cond instructions, which have a 1 MiB
// range limit. We can easily exceed the limit in case the jump straddles two pages.
- // In this case, we retry with a fresh page.
+ // In this case, we retry with a fresh page once.
cb.set_label_state(starting_label_state);
- cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation);
- let result = asm.arm64_emit(cb, &mut ocb);
- assert_ne!(
- Err(EmitError::RetryOnNextPage),
- result,
- "should not fail when writing to a fresh code page"
- );
- result
+ if cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation) {
+ asm.arm64_emit(cb, &mut ocb)
+ } else {
+ Err(EmitError::OutOfMemory)
+ }
}
result => result
};
@@ -1266,7 +1360,7 @@ impl Assembler
#[cfg(not(test))]
cb.without_page_end_reserve(|cb| {
for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
- unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+ unsafe { rb_jit_icache_invalidate(start as _, end as _) };
}
});
@@ -1285,7 +1379,7 @@ mod tests {
use crate::disasm::*;
fn setup_asm() -> (Assembler, CodeBlock) {
- (Assembler::new(), CodeBlock::new_dummy(1024))
+ (Assembler::new(0), CodeBlock::new_dummy(1024))
}
#[test]
@@ -1324,7 +1418,7 @@ mod tests {
fn test_emit_cpop_all() {
let (mut asm, mut cb) = setup_asm();
- asm.cpop_all();
+ asm.cpop_all(crate::core::RegMapping::default());
asm.compile_with_num_regs(&mut cb, 0);
}
@@ -1584,7 +1678,7 @@ mod tests {
#[test]
fn test_bcond_straddling_code_pages() {
const LANDING_PAGE: usize = 65;
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
// Skip to near the end of the page. Room for two instructions.
@@ -1701,4 +1795,35 @@ mod tests {
0x8: csel x1, x11, x12, lt
"});
}
+
+ #[test]
+ fn test_add_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into());
+ let out = asm.add(out, 1_usize.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {"
+ 0x0: adds x11, x9, #1
+ 0x4: adds x11, x11, #1
+ 0x8: mov x1, x11
+ "});
+ }
+
+ #[test]
+ fn test_mul_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {"
+ 0x0: mov x11, #3
+ 0x4: mul x11, x9, x11
+ 0x8: mov x1, x11
+ "});
+ }
}
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 613e7048d4..3fb67bc7cc 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -2,11 +2,11 @@ use std::collections::HashMap;
use std::fmt;
use std::convert::From;
use std::mem::take;
-use crate::codegen::{gen_outlined_exit, gen_counted_exit};
-use crate::cruby::{VALUE, SIZEOF_VALUE_I32};
-use crate::virtualmem::{CodePtr};
+use crate::codegen::{gen_counted_exit, gen_outlined_exit};
+use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE, VM_ENV_DATA_SIZE};
+use crate::virtualmem::CodePtr;
use crate::asm::{CodeBlock, OutlinedCb};
-use crate::core::{Context, RegTemps, MAX_REG_TEMPS};
+use crate::core::{Context, RegMapping, RegOpnd, MAX_CTX_TEMPS};
use crate::options::*;
use crate::stats::*;
@@ -77,10 +77,12 @@ pub enum Opnd
num_bits: u8,
/// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg.
stack_size: u8,
+ /// The number of local variables in the current ISEQ. Used only for locals.
+ num_locals: Option<u32>,
/// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem.
sp_offset: i8,
- /// ctx.reg_temps when this operand is read. Used for register allocation.
- reg_temps: Option<RegTemps>
+ /// ctx.reg_mapping when this operand is read. Used for register allocation.
+ reg_mapping: Option<RegMapping>
},
// Low-level operands, for lowering
@@ -172,7 +174,7 @@ impl Opnd
Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))),
Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })),
Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }),
- Opnd::Stack { idx, stack_size, sp_offset, reg_temps, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps }),
+ Opnd::Stack { idx, stack_size, num_locals, sp_offset, reg_mapping, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping }),
_ => None,
}
}
@@ -227,23 +229,26 @@ impl Opnd
Self::match_num_bits_iter(opnds.iter())
}
- /// Calculate Opnd::Stack's index from the stack bottom.
- pub fn stack_idx(&self) -> u8 {
- match self {
- Opnd::Stack { idx, stack_size, .. } => {
- (*stack_size as isize - *idx as isize - 1) as u8
- },
- _ => unreachable!(),
- }
+ /// Convert Opnd::Stack into RegMapping
+ pub fn reg_opnd(&self) -> RegOpnd {
+ self.get_reg_opnd().unwrap()
}
- /// Get the index for stack temp registers.
- pub fn reg_idx(&self) -> usize {
- match self {
- Opnd::Stack { .. } => {
- self.stack_idx() as usize % get_option!(num_temp_regs)
- },
- _ => unreachable!(),
+ /// Convert an operand into RegMapping if it's Opnd::Stack
+ pub fn get_reg_opnd(&self) -> Option<RegOpnd> {
+ match *self {
+ Opnd::Stack { idx, stack_size, num_locals, .. } => Some(
+ if let Some(num_locals) = num_locals {
+ let last_idx = stack_size as i32 + VM_ENV_DATA_SIZE as i32 - 1;
+ assert!(last_idx <= idx, "Local index {} must be >= last local index {}", idx, last_idx);
+ assert!(idx <= last_idx + num_locals as i32, "Local index {} must be < last local index {} + local size {}", idx, last_idx, num_locals);
+ RegOpnd::Local((last_idx + num_locals as i32 - idx) as u8)
+ } else {
+ assert!(idx < stack_size as i32);
+ RegOpnd::Stack((stack_size as i32 - idx - 1) as u8)
+ }
+ ),
+ _ => None,
}
}
}
@@ -423,6 +428,9 @@ pub enum Insn {
/// Jump if greater
Jg(Target),
+ /// Jump if greater or equal
+ Jge(Target),
+
// Unconditional jump to a branch target
Jmp(Target),
@@ -444,6 +452,12 @@ pub enum Insn {
/// Jump if zero
Jz(Target),
+ /// Jump if operand is zero (only used during lowering at the moment)
+ Joz(Opnd, Target),
+
+ /// Jump if operand is non-zero (only used during lowering at the moment)
+ Jonz(Opnd, Target),
+
// Add a label into the IR at the point that this instruction is added.
Label(Target),
@@ -514,13 +528,13 @@ pub enum Insn {
impl Insn {
/// Create an iterator that will yield a non-mutable reference to each
/// operand in turn for this instruction.
- pub(super) fn opnd_iter(&self) -> InsnOpndIterator {
+ pub(super) fn opnd_iter(&self) -> InsnOpndIterator<'_> {
InsnOpndIterator::new(self)
}
/// Create an iterator that will yield a mutable reference to each operand
/// in turn for this instruction.
- pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator {
+ pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator<'_> {
InsnOpndMutIterator::new(self)
}
@@ -532,12 +546,16 @@ impl Insn {
Insn::Je(target) |
Insn::Jl(target) |
Insn::Jg(target) |
+ Insn::Jge(target) |
Insn::Jmp(target) |
Insn::Jne(target) |
Insn::Jnz(target) |
Insn::Jo(target) |
Insn::Jz(target) |
Insn::Label(target) |
+ Insn::JoMul(target) |
+ Insn::Joz(_, target) |
+ Insn::Jonz(_, target) |
Insn::LeaJumpTarget { target, .. } => {
Some(target)
}
@@ -578,6 +596,7 @@ impl Insn {
Insn::Je(_) => "Je",
Insn::Jl(_) => "Jl",
Insn::Jg(_) => "Jg",
+ Insn::Jge(_) => "Jge",
Insn::Jmp(_) => "Jmp",
Insn::JmpOpnd(_) => "JmpOpnd",
Insn::Jne(_) => "Jne",
@@ -585,6 +604,8 @@ impl Insn {
Insn::Jo(_) => "Jo",
Insn::JoMul(_) => "JoMul",
Insn::Jz(_) => "Jz",
+ Insn::Joz(..) => "Joz",
+ Insn::Jonz(..) => "Jonz",
Insn::Label(_) => "Label",
Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
Insn::Lea { .. } => "Lea",
@@ -682,6 +703,7 @@ impl Insn {
Insn::Je(target) |
Insn::Jl(target) |
Insn::Jg(target) |
+ Insn::Jge(target) |
Insn::Jmp(target) |
Insn::Jne(target) |
Insn::Jnz(target) |
@@ -733,6 +755,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::Je(_) |
Insn::Jl(_) |
Insn::Jg(_) |
+ Insn::Jge(_) |
Insn::Jmp(_) |
Insn::Jne(_) |
Insn::Jnz(_) |
@@ -743,6 +766,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
+
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
Insn::CRet(opnd) |
@@ -751,6 +775,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::LiveReg { opnd, .. } |
Insn::Load { opnd, .. } |
Insn::LoadSExt { opnd, .. } |
+ Insn::Joz(opnd, _) |
+ Insn::Jonz(opnd, _) |
Insn::Not { opnd, .. } => {
match self.idx {
0 => {
@@ -834,6 +860,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::Je(_) |
Insn::Jl(_) |
Insn::Jg(_) |
+ Insn::Jge(_) |
Insn::Jmp(_) |
Insn::Jne(_) |
Insn::Jnz(_) |
@@ -844,6 +871,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
+
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
Insn::CRet(opnd) |
@@ -852,6 +880,8 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::LiveReg { opnd, .. } |
Insn::Load { opnd, .. } |
Insn::LoadSExt { opnd, .. } |
+ Insn::Joz(opnd, _) |
+ Insn::Jonz(opnd, _) |
Insn::Not { opnd, .. } => {
match self.idx {
0 => {
@@ -944,8 +974,9 @@ pub struct SideExitContext {
/// Context fields used by get_generic_ctx()
pub stack_size: u8,
pub sp_offset: i8,
- pub reg_temps: RegTemps,
+ pub reg_mapping: RegMapping,
pub is_return_landing: bool,
+ pub is_deferred: bool,
}
impl SideExitContext {
@@ -955,8 +986,9 @@ impl SideExitContext {
pc,
stack_size: ctx.get_stack_size(),
sp_offset: ctx.get_sp_offset(),
- reg_temps: ctx.get_reg_temps(),
+ reg_mapping: ctx.get_reg_mapping(),
is_return_landing: ctx.is_return_landing(),
+ is_deferred: ctx.is_deferred(),
};
if cfg!(debug_assertions) {
// Assert that we're not losing any mandatory metadata
@@ -970,10 +1002,13 @@ impl SideExitContext {
let mut ctx = Context::default();
ctx.set_stack_size(self.stack_size);
ctx.set_sp_offset(self.sp_offset);
- ctx.set_reg_temps(self.reg_temps);
+ ctx.set_reg_mapping(self.reg_mapping);
if self.is_return_landing {
ctx.set_as_return_landing();
}
+ if self.is_deferred {
+ ctx.mark_as_deferred();
+ }
ctx
}
}
@@ -996,6 +1031,13 @@ pub struct Assembler {
/// Context for generating the current insn
pub ctx: Context,
+ /// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
+ /// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall().
+ ///
+ /// `None` means we're not assembling for an ISEQ, or that the local size is
+ /// not relevant.
+ pub(super) num_locals: Option<u32>,
+
/// Side exit caches for each SideExitContext
pub(super) side_exits: HashMap<SideExitContext, CodePtr>,
@@ -1004,23 +1046,42 @@ pub struct Assembler {
/// Stack size for Target::SideExit
side_exit_stack_size: Option<u8>,
+
+ /// If true, the next ccall() should verify its leafness
+ leaf_ccall: bool,
}
impl Assembler
{
- pub fn new() -> Self {
- Self::new_with_label_names(Vec::default(), HashMap::default())
+ /// Create an Assembler for ISEQ-specific code.
+ /// It includes all inline code and some outlined code like side exits and stubs.
+ pub fn new(num_locals: u32) -> Self {
+ Self::new_with_label_names(Vec::default(), HashMap::default(), Some(num_locals))
+ }
+
+ /// Create an Assembler for outlined code that are not specific to any ISEQ,
+ /// e.g. trampolines that are shared globally.
+ pub fn new_without_iseq() -> Self {
+ Self::new_with_label_names(Vec::default(), HashMap::default(), None)
}
- pub fn new_with_label_names(label_names: Vec<String>, side_exits: HashMap<SideExitContext, CodePtr>) -> Self {
+ /// Create an Assembler with parameters that are populated by another Assembler instance.
+ /// This API is used for copying an Assembler for the next compiler pass.
+ pub fn new_with_label_names(
+ label_names: Vec<String>,
+ side_exits: HashMap<SideExitContext, CodePtr>,
+ num_locals: Option<u32>
+ ) -> Self {
Self {
insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
label_names,
ctx: Context::default(),
+ num_locals,
side_exits,
side_exit_pc: None,
side_exit_stack_size: None,
+ leaf_ccall: false,
}
}
@@ -1030,6 +1091,11 @@ impl Assembler
&TEMP_REGS[0..num_regs]
}
+ /// Get the number of locals for the ISEQ being compiled
+ pub fn get_num_locals(&self) -> Option<u32> {
+ self.num_locals
+ }
+
/// Set a context for generating side exits
pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) {
self.side_exit_pc = Some(pc);
@@ -1051,25 +1117,32 @@ impl Assembler
let mut opnd_iter = insn.opnd_iter_mut();
while let Some(opnd) = opnd_iter.next() {
- match opnd {
+ match *opnd {
// If we find any InsnOut from previous instructions, we're going to update
// the live range of the previous instruction to point to this one.
Opnd::InsnOut { idx, .. } => {
- assert!(*idx < self.insns.len());
- self.live_ranges[*idx] = insn_idx;
+ assert!(idx < self.insns.len());
+ self.live_ranges[idx] = insn_idx;
}
Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => {
- assert!(*idx < self.insns.len());
- self.live_ranges[*idx] = insn_idx;
+ assert!(idx < self.insns.len());
+ self.live_ranges[idx] = insn_idx;
}
- // Set current ctx.reg_temps to Opnd::Stack.
- Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: None } => {
+ // Set current ctx.reg_mapping to Opnd::Stack.
+ Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: None } => {
+ assert_eq!(
+ self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16,
+ stack_size as i16 - sp_offset as i16,
+ "Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})",
+ stack_size, sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(),
+ );
*opnd = Opnd::Stack {
- idx: *idx,
- num_bits: *num_bits,
- stack_size: *stack_size,
- sp_offset: *sp_offset,
- reg_temps: Some(self.ctx.get_reg_temps()),
+ idx,
+ num_bits,
+ stack_size,
+ num_locals,
+ sp_offset,
+ reg_mapping: Some(self.ctx.get_reg_mapping()),
};
}
_ => {}
@@ -1096,7 +1169,7 @@ impl Assembler
// Get a cached side exit
let side_exit = match self.side_exits.get(&side_exit_context) {
None => {
- let exit_code = gen_outlined_exit(side_exit_context.pc, &side_exit_context.get_ctx(), ocb)?;
+ let exit_code = gen_outlined_exit(side_exit_context.pc, self.num_locals.unwrap(), &side_exit_context.get_ctx(), ocb)?;
self.side_exits.insert(*side_exit_context, exit_code);
exit_code
}
@@ -1104,7 +1177,7 @@ impl Assembler
};
// Wrap a counter if needed
- gen_counted_exit(side_exit, ocb, counter)
+ gen_counted_exit(side_exit_context.pc, side_exit, ocb, counter)
}
/// Create a new label instance that we can jump to
@@ -1130,20 +1203,20 @@ impl Assembler
}
// Convert Opnd::Stack to Opnd::Reg
- fn reg_opnd(opnd: &Opnd) -> Opnd {
+ fn reg_opnd(opnd: &Opnd, reg_idx: usize) -> Opnd {
let regs = Assembler::get_temp_regs();
if let Opnd::Stack { num_bits, .. } = *opnd {
incr_counter!(temp_reg_opnd);
- Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap()
+ Opnd::Reg(regs[reg_idx]).with_num_bits(num_bits).unwrap()
} else {
unreachable!()
}
}
match opnd {
- Opnd::Stack { reg_temps, .. } => {
- if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps.unwrap().get(opnd.stack_idx()) {
- reg_opnd(opnd)
+ Opnd::Stack { reg_mapping, .. } => {
+ if let Some(reg_idx) = reg_mapping.unwrap().get_reg(opnd.reg_opnd()) {
+ reg_opnd(opnd, reg_idx)
} else {
mem_opnd(opnd)
}
@@ -1153,62 +1226,111 @@ impl Assembler
}
/// Allocate a register to a stack temp if available.
- pub fn alloc_temp_reg(&mut self, stack_idx: u8) {
- if get_option!(num_temp_regs) == 0 {
- return;
+ pub fn alloc_reg(&mut self, mapping: RegOpnd) {
+ // Allocate a register if there's no conflict.
+ let mut reg_mapping = self.ctx.get_reg_mapping();
+ if reg_mapping.alloc_reg(mapping) {
+ self.set_reg_mapping(reg_mapping);
}
+ }
- // Allocate a register if there's no conflict.
- let mut reg_temps = self.ctx.get_reg_temps();
- if reg_temps.conflicts_with(stack_idx) {
- assert!(!reg_temps.get(stack_idx));
- } else {
- reg_temps.set(stack_idx, true);
- self.set_reg_temps(reg_temps);
+ /// Erase local variable type information
+ /// eg: because of a call we can't track
+ pub fn clear_local_types(&mut self) {
+ asm_comment!(self, "clear local variable types");
+ self.ctx.clear_local_types();
+ }
+
+ /// Repurpose stack temp registers to the corresponding locals for arguments
+ pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> {
+ let mut callee_reg_mapping = callee_ctx.get_reg_mapping();
+ let mut mapped_temps = vec![];
+
+ for arg_idx in 0..argc {
+ let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap();
+ let temp_opnd = RegOpnd::Stack(stack_idx);
+
+ // For each argument, if the stack temp for it has a register,
+ // let the callee use the register for the local variable.
+ if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) {
+ let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap());
+ callee_reg_mapping.set_reg(local_opnd, reg_idx);
+ mapped_temps.push(temp_opnd);
+ }
}
+
+ asm_comment!(self, "local maps: {:?}", callee_reg_mapping);
+ callee_ctx.set_reg_mapping(callee_reg_mapping);
+ mapped_temps
}
- /// Spill all live stack temps from registers to the stack
- pub fn spill_temps(&mut self) {
+ /// Spill all live registers to the stack
+ pub fn spill_regs(&mut self) {
+ self.spill_regs_except(&vec![]);
+ }
+
+ /// Spill all live registers except `ignored_temps` to the stack
+ pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) {
// Forget registers above the stack top
- let mut reg_temps = self.ctx.get_reg_temps();
- for stack_idx in self.ctx.get_stack_size()..MAX_REG_TEMPS {
- reg_temps.set(stack_idx, false);
+ let mut reg_mapping = self.ctx.get_reg_mapping();
+ for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
+ reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx));
}
- self.set_reg_temps(reg_temps);
-
- // Spill live stack temps
- if self.ctx.get_reg_temps() != RegTemps::default() {
- asm_comment!(self, "spill_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), RegTemps::default().as_u8());
- for stack_idx in 0..u8::min(MAX_REG_TEMPS, self.ctx.get_stack_size()) {
- if self.ctx.get_reg_temps().get(stack_idx) {
- let idx = self.ctx.get_stack_size() - 1 - stack_idx;
- self.spill_temp(self.stack_opnd(idx.into()));
- reg_temps.set(stack_idx, false);
- }
+ self.set_reg_mapping(reg_mapping);
+
+ // If no registers are in use, skip all checks
+ if self.ctx.get_reg_mapping() == RegMapping::default() {
+ return;
+ }
+
+ // Collect stack temps to be spilled
+ let mut spilled_opnds = vec![];
+ for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
+ let reg_opnd = RegOpnd::Stack(stack_idx);
+ if !ignored_temps.contains(&reg_opnd) && reg_mapping.dealloc_reg(reg_opnd) {
+ let idx = self.ctx.get_stack_size() - 1 - stack_idx;
+ let spilled_opnd = self.stack_opnd(idx.into());
+ spilled_opnds.push(spilled_opnd);
+ reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
+ }
+ }
+
+ // Collect locals to be spilled
+ for local_idx in 0..MAX_CTX_TEMPS as u8 {
+ if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
+ let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
+ let ep_offset = first_local_ep_offset - local_idx as u32;
+ let spilled_opnd = self.local_opnd(ep_offset);
+ spilled_opnds.push(spilled_opnd);
+ reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
- self.ctx.set_reg_temps(reg_temps);
}
- // Every stack temp should have been spilled
- assert_eq!(self.ctx.get_reg_temps(), RegTemps::default());
+ // Spill stack temps and locals
+ if !spilled_opnds.is_empty() {
+ asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
+ for &spilled_opnd in spilled_opnds.iter() {
+ self.spill_reg(spilled_opnd);
+ }
+ self.ctx.set_reg_mapping(reg_mapping);
+ }
}
/// Spill a stack temp from a register to the stack
- fn spill_temp(&mut self, opnd: Opnd) {
- assert!(self.ctx.get_reg_temps().get(opnd.stack_idx()));
+ pub fn spill_reg(&mut self, opnd: Opnd) {
+ assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
- // Use different RegTemps for dest and src operands
- let reg_temps = self.ctx.get_reg_temps();
- let mut mem_temps = reg_temps;
- mem_temps.set(opnd.stack_idx(), false);
+ // Use different RegMappings for dest and src operands
+ let reg_mapping = self.ctx.get_reg_mapping();
+ let mut mem_mappings = reg_mapping;
+ mem_mappings.dealloc_reg(opnd.reg_opnd());
// Move the stack operand from a register to memory
match opnd {
- Opnd::Stack { idx, num_bits, stack_size, sp_offset, .. } => {
+ Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, .. } => {
self.mov(
- Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(mem_temps) },
- Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(reg_temps) },
+ Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(mem_mappings) },
+ Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(reg_mapping) },
);
}
_ => unreachable!(),
@@ -1217,21 +1339,47 @@ impl Assembler
}
/// Update which stack temps are in a register
- pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
- if self.ctx.get_reg_temps() != reg_temps {
- asm_comment!(self, "reg_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), reg_temps.as_u8());
- self.ctx.set_reg_temps(reg_temps);
- self.verify_reg_temps();
+ pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) {
+ if self.ctx.get_reg_mapping() != reg_mapping {
+ asm_comment!(self, "reg_mapping: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
+ self.ctx.set_reg_mapping(reg_mapping);
}
}
- /// Assert there's no conflict in stack temp register allocation
- fn verify_reg_temps(&self) {
- for stack_idx in 0..MAX_REG_TEMPS {
- if self.ctx.get_reg_temps().get(stack_idx) {
- assert!(!self.ctx.get_reg_temps().conflicts_with(stack_idx));
+ // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
+ // so that they will not rewrite each other before they are used.
+ pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
+ // Return the index of a move whose destination is not used as a source if any.
+ fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> {
+ moves.iter().enumerate().find(|(_, &(dest_reg, _))| {
+ moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
+ }).map(|(index, _)| index)
+ }
+
+ // Remove moves whose source and destination are the same
+ let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter()
+ .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
+
+ let mut new_moves = vec![];
+ while old_moves.len() > 0 {
+ // Keep taking safe moves
+ while let Some(index) = find_safe_move(&old_moves) {
+ new_moves.push(old_moves.remove(index));
+ }
+
+ // No safe move. Load the source of one move into SCRATCH_REG, and
+ // then load SCRATCH_REG into the destination when it's safe.
+ if old_moves.len() > 0 {
+ // Make sure it's safe to use SCRATCH_REG
+ assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
+
+ // Move SCRATCH <- opnd, and delay reg <- SCRATCH
+ let (reg, opnd) = old_moves.remove(0);
+ new_moves.push((Assembler::SCRATCH_REG, opnd));
+ old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
}
}
+ new_moves
}
/// Sets the out field on the various instructions that require allocated
@@ -1279,42 +1427,6 @@ impl Assembler
}
}
- // Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
- // so that they will not rewrite each other before they are used.
- fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
- // Return the index of a move whose destination is not used as a source if any.
- fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
- c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
- c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
- }).map(|(index, _)| index)
- }
-
- // Remove moves whose source and destination are the same
- let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
- .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
-
- let mut moves = vec![];
- while c_args.len() > 0 {
- // Keep taking safe moves
- while let Some(index) = find_safe_arg(&c_args) {
- moves.push(c_args.remove(index));
- }
-
- // No safe move. Load the source of one move into SCRATCH_REG, and
- // then load SCRATCH_REG into the destination when it's safe.
- if c_args.len() > 0 {
- // Make sure it's safe to use SCRATCH_REG
- assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
-
- // Move SCRATCH <- opnd, and delay reg <- SCRATCH
- let (reg, opnd) = c_args.remove(0);
- moves.push((Assembler::SCRATCH_REG, opnd));
- c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
- }
- }
- moves
- }
-
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
if shift_offset >= 0 {
@@ -1359,7 +1471,7 @@ impl Assembler
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
// shifted_live_ranges is indexed by mapped indexes in insn operands.
let mut shifted_live_ranges: Vec<usize> = live_ranges.clone();
- let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let mut iterator = self.into_draining_iter();
while let Some((index, mut insn)) = iterator.next_mapped() {
@@ -1490,7 +1602,7 @@ impl Assembler
if c_args.len() > 0 {
// Resolve C argument dependencies
let c_args_len = c_args.len() as isize;
- let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
+ let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args));
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
// Push batched C arguments
@@ -1513,13 +1625,10 @@ impl Assembler
#[must_use]
pub fn compile(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>) -> Option<(CodePtr, Vec<u32>)>
{
- #[cfg(feature = "disasm")]
let start_addr = cb.get_write_ptr();
-
let alloc_regs = Self::get_alloc_regs();
let ret = self.compile_with_regs(cb, ocb, alloc_regs);
- #[cfg(feature = "disasm")]
if let Some(dump_disasm) = get_option_ref!(dump_disasm) {
use crate::disasm::dump_disasm_addr_range;
let end_addr = cb.get_write_ptr();
@@ -1541,6 +1650,16 @@ impl Assembler
pub fn into_draining_iter(self) -> AssemblerDrainingIterator {
AssemblerDrainingIterator::new(self)
}
+
+ /// Return true if the next ccall() is expected to be leaf.
+ pub fn get_leaf_ccall(&mut self) -> bool {
+ self.leaf_ccall
+ }
+
+ /// Assert that the next ccall() is going to be leaf.
+ pub fn expect_leaf_ccall(&mut self) {
+ self.leaf_ccall = true;
+ }
}
/// A struct that allows iterating through an assembler's instructions and
@@ -1641,28 +1760,59 @@ impl Assembler {
}
pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd {
- let old_temps = self.ctx.get_reg_temps(); // with registers
+ // Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set
+ let canary_opnd = self.set_stack_canary(&opnds);
+
+ let old_temps = self.ctx.get_reg_mapping(); // with registers
// Spill stack temp registers since they are caller-saved registers.
// Note that this doesn't spill stack temps that are already popped
// but may still be used in the C arguments.
- self.spill_temps();
- let new_temps = self.ctx.get_reg_temps(); // all spilled
+ self.spill_regs();
+ let new_temps = self.ctx.get_reg_mapping(); // all spilled
- // Temporarily manipulate RegTemps so that we can use registers
+ // Temporarily manipulate RegMappings so that we can use registers
// to pass stack operands that are already spilled above.
- self.ctx.set_reg_temps(old_temps);
+ self.ctx.set_reg_mapping(old_temps);
// Call a C function
let out = self.next_opnd_out(Opnd::match_num_bits(&opnds));
self.push_insn(Insn::CCall { fptr, opnds, out });
// Registers in old_temps may be clobbered by the above C call,
- // so rollback the manipulated RegTemps to a spilled version.
- self.ctx.set_reg_temps(new_temps);
+ // so rollback the manipulated RegMappings to a spilled version.
+ self.ctx.set_reg_mapping(new_temps);
+
+ // Clear the canary after use
+ if let Some(canary_opnd) = canary_opnd {
+ self.mov(canary_opnd, 0.into());
+ }
out
}
+ /// Let vm_check_canary() assert the leafness of this ccall if leaf_ccall is set
+ fn set_stack_canary(&mut self, opnds: &Vec<Opnd>) -> Option<Opnd> {
+ // Use the slot right above the stack top for verifying leafness.
+ let canary_opnd = self.stack_opnd(-1);
+
+ // If the slot is already used, which is a valid optimization to avoid spills,
+ // give up the verification.
+ let canary_opnd = if cfg!(feature = "runtime_checks") && self.leaf_ccall && opnds.iter().all(|opnd|
+ opnd.get_reg_opnd() != canary_opnd.get_reg_opnd()
+ ) {
+ asm_comment!(self, "set stack canary");
+ self.mov(canary_opnd, vm_stack_canary().into());
+ Some(canary_opnd)
+ } else {
+ None
+ };
+
+ // Avoid carrying the flag to the next instruction whether we verified it or not.
+ self.leaf_ccall = false;
+
+ canary_opnd
+ }
+
pub fn cmp(&mut self, left: Opnd, right: Opnd) {
self.push_insn(Insn::Cmp { left, right });
}
@@ -1674,12 +1824,12 @@ impl Assembler {
out
}
- pub fn cpop_all(&mut self) {
+ pub fn cpop_all(&mut self, reg_mapping: RegMapping) {
self.push_insn(Insn::CPopAll);
- // Re-enable ccall's RegTemps assertion disabled by cpush_all.
+ // Re-enable ccall's RegMappings assertion disabled by cpush_all.
// cpush_all + cpop_all preserve all stack temp registers, so it's safe.
- self.set_reg_temps(self.ctx.get_reg_temps());
+ self.set_reg_mapping(reg_mapping);
}
pub fn cpop_into(&mut self, opnd: Opnd) {
@@ -1690,14 +1840,16 @@ impl Assembler {
self.push_insn(Insn::CPush(opnd));
}
- pub fn cpush_all(&mut self) {
+ pub fn cpush_all(&mut self) -> RegMapping {
self.push_insn(Insn::CPushAll);
// Mark all temps as not being in registers.
// Temps will be marked back as being in registers by cpop_all.
// We assume that cpush_all + cpop_all are used for C functions in utils.rs
- // that don't require spill_temps for GC.
- self.set_reg_temps(RegTemps::default());
+ // that don't require spill_regs for GC.
+ let mapping = self.ctx.get_reg_mapping();
+ self.set_reg_mapping(RegMapping::default());
+ mapping
}
pub fn cret(&mut self, opnd: Opnd) {
@@ -1793,6 +1945,11 @@ impl Assembler {
self.push_insn(Insn::Jg(target));
}
+ #[allow(dead_code)]
+ pub fn jge(&mut self, target: Target) {
+ self.push_insn(Insn::Jge(target));
+ }
+
pub fn jmp(&mut self, target: Target) {
self.push_insn(Insn::Jmp(target));
}
@@ -1934,6 +2091,16 @@ impl Assembler {
out
}
+ /// Verify the leafness of the given block
+ pub fn with_leaf_ccall<F, R>(&mut self, mut block: F) -> R
+ where F: FnMut(&mut Self) -> R {
+ let old_leaf_ccall = self.leaf_ccall;
+ self.leaf_ccall = true;
+ let ret = block(self);
+ self.leaf_ccall = old_leaf_ccall;
+ ret
+ }
+
/// Add a label at the current position
pub fn write_label(&mut self, target: Target) {
assert!(target.unwrap_label_idx() < self.label_names.len());
@@ -1949,10 +2116,10 @@ impl Assembler {
}
/// Macro to use format! for Insn::Comment, which skips a format! call
-/// when disasm is not supported.
+/// when not dumping disassembly.
macro_rules! asm_comment {
($asm:expr, $($fmt:tt)*) => {
- if cfg!(feature = "disasm") {
+ if $crate::options::get_option_ref!(dump_disasm).is_some() {
$asm.push_insn(Insn::Comment(format!($($fmt)*)));
}
};
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
index 01e87fe26c..bfeea5163a 100644
--- a/yjit/src/backend/tests.rs
+++ b/yjit/src/backend/tests.rs
@@ -1,19 +1,19 @@
#![cfg(test)]
-use crate::asm::{CodeBlock};
+use crate::asm::CodeBlock;
use crate::backend::ir::*;
use crate::cruby::*;
use crate::utils::c_callable;
#[test]
fn test_add() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
let out = asm.add(SP, Opnd::UImm(1));
let _ = asm.add(out, Opnd::UImm(2));
}
#[test]
fn test_alloc_regs() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
// Get the first output that we're going to reuse later.
let out1 = asm.add(EC, Opnd::UImm(1));
@@ -62,7 +62,7 @@ fn test_alloc_regs() {
fn setup_asm() -> (Assembler, CodeBlock) {
return (
- Assembler::new(),
+ Assembler::new(0),
CodeBlock::new_dummy(1024)
);
}
@@ -194,7 +194,7 @@ fn test_c_call()
#[test]
fn test_alloc_ccall_regs() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
let out1 = asm.ccall(0 as *const u8, vec![]);
let out2 = asm.ccall(0 as *const u8, vec![out1]);
asm.mov(EC, out2);
@@ -232,9 +232,9 @@ fn test_jcc_ptr()
let (mut asm, mut cb) = setup_asm();
let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
- let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
+ let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK as i32));
asm.test(
- Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
+ Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32),
not_mask,
);
asm.jnz(side_exit);
@@ -283,8 +283,7 @@ fn test_bake_string() {
#[test]
fn test_draining_iterator() {
-
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
let _ = asm.load(Opnd::None);
asm.store(Opnd::None, Opnd::None);
@@ -315,7 +314,7 @@ fn test_cmp_8_bit() {
fn test_no_pos_marker_callback_when_compile_fails() {
// When compilation fails (e.g. when out of memory), the code written out is malformed.
// We don't want to invoke the pos_marker callbacks with positions of malformed code.
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
// Markers around code to exhaust memory limit
let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called");
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index 25c92642d3..ef435bca7e 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -79,7 +79,7 @@ impl From<&Opnd> for X86Opnd {
}
}
-/// List of registers that can be used for stack temps.
+/// List of registers that can be used for stack temps and locals.
pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
impl Assembler
@@ -112,7 +112,7 @@ impl Assembler
fn x86_split(mut self) -> Assembler
{
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
- let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let mut iterator = self.into_draining_iter();
while let Some((index, mut insn)) = iterator.next_unmapped() {
@@ -181,6 +181,23 @@ impl Assembler
iterator.map_insn_index(&mut asm);
iterator.next_unmapped(); // Pop merged Insn::Mov
}
+ (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src }))
+ if out == src && live_ranges[index] == index + 1 && {
+ // We want to do `dest == left`, but `left` has already gone
+ // through lower_stack_opnd() while `dest` has not. So we
+ // lower `dest` before comparing.
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ lowered_dest == *left
+ } => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
_ => {
match (unmapped_opnds[0], unmapped_opnds[1]) {
(Opnd::Mem(_), Opnd::Mem(_)) => {
@@ -271,7 +288,11 @@ impl Assembler
*truthy = asm.load(*truthy);
}
},
- Opnd::UImm(_) | Opnd::Imm(_) | Opnd::Value(_) => {
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *truthy = asm.load(*truthy);
+ },
+ // Opnd::Value could have already been split
+ Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => {
*truthy = asm.load(*truthy);
},
_ => {}
@@ -294,19 +315,24 @@ impl Assembler
let opnd1 = asm.load(*src);
asm.mov(*dest, opnd1);
},
- (Opnd::Mem(_), Opnd::UImm(value)) => {
- // 32-bit values will be sign-extended
- if imm_num_bits(*value as i64) > 32 {
+ (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => {
+ // For 64 bit destinations, 32-bit values will be sign-extended
+ if *num_bits == 64 && imm_num_bits(*value as i64) > 32 {
let opnd1 = asm.load(*src);
asm.mov(*dest, opnd1);
} else {
asm.mov(*dest, *src);
}
},
- (Opnd::Mem(_), Opnd::Imm(value)) => {
- if imm_num_bits(*value) > 32 {
+ (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => {
+ // For 64 bit destinations, 32-bit values will be sign-extended
+ if *num_bits == 64 && imm_num_bits(*value) > 32 {
let opnd1 = asm.load(*src);
asm.mov(*dest, opnd1);
+ } else if uimm_num_bits(*value as u64) <= *num_bits {
+ // If the bit string is short enough for the destination, use the unsigned representation.
+ // Note that 64-bit and negative values are ruled out.
+ asm.mov(*dest, Opnd::UImm(*value as u64));
} else {
asm.mov(*dest, *src);
}
@@ -471,9 +497,7 @@ impl Assembler
match insn {
Insn::Comment(text) => {
- if cfg!(feature = "disasm") {
- cb.add_comment(text);
- }
+ cb.add_comment(text);
},
// Write the label at the current position
@@ -726,6 +750,14 @@ impl Assembler
}
},
+ Insn::Jge(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jge_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
Insn::Jbe(target) => {
match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr),
@@ -767,6 +799,8 @@ impl Assembler
}
}
+ Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
+
// Atomically increment a counter at a given memory location
Insn::IncrCounter { mem, value } => {
assert!(matches!(mem, Opnd::Mem(_)));
@@ -866,14 +900,14 @@ impl Assembler
#[cfg(test)]
mod tests {
- use crate::disasm::{assert_disasm};
+ use crate::disasm::assert_disasm;
#[cfg(feature = "disasm")]
use crate::disasm::{unindent, disasm_addr_range};
use super::*;
fn setup_asm() -> (Assembler, CodeBlock) {
- (Assembler::new(), CodeBlock::new_dummy(1024))
+ (Assembler::new(0), CodeBlock::new_dummy(1024))
}
#[test]
@@ -1270,4 +1304,37 @@ mod tests {
0xe: mov qword ptr [rbx], rax
"});
}
+
+ #[test]
+ fn test_csel_split() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let stack_top = Opnd::mem(64, SP, 0);
+ let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into());
+ asm.mov(stack_top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {"
+ 0x0: movabs rax, 0x7f22c88d1930
+ 0xa: mov ecx, 4
+ 0xf: cmove rax, rcx
+ 0x13: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_mov_m32_imm32() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, C_RET_OPND, 0);
+ asm.mov(shape_opnd, Opnd::UImm(0x8000_0001));
+ asm.mov(shape_opnd, Opnd::Imm(0x8000_0001));
+
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm!(cb, "c70001000080c70001000080", {"
+ 0x0: mov dword ptr [rax], 0x80000001
+ 0x6: mov dword ptr [rax], 0x80000001
+ "});
+ }
}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 2c3e61356c..0fbca85716 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -3,6 +3,7 @@
use crate::asm::*;
use crate::backend::ir::*;
+use crate::backend::current::TEMP_REGS;
use crate::core::*;
use crate::cruby::*;
use crate::invariants::*;
@@ -16,6 +17,7 @@ use std::cell::Cell;
use std::cmp;
use std::cmp::min;
use std::collections::HashMap;
+use std::ffi::c_void;
use std::ffi::CStr;
use std::mem;
use std::os::raw::c_int;
@@ -37,14 +39,13 @@ enum CodegenStatus {
type InsnGenFn = fn(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus>;
/// Ephemeral code generation state.
-/// Represents a [core::Block] while we build it.
-pub struct JITState {
+/// Represents a [crate::core::Block] while we build it.
+pub struct JITState<'a> {
/// Instruction sequence for the compiling block
- iseq: IseqPtr,
+ pub iseq: IseqPtr,
/// The iseq index of the first instruction in the block
starting_insn_idx: IseqIdx,
@@ -71,6 +72,10 @@ pub struct JITState {
/// This allows us to peek at run-time values
ec: EcPtr,
+ /// The code block used for stubs, exits, and other code that are
+ /// not on the hot path.
+ outlined_code_block: &'a mut OutlinedCb,
+
/// The outgoing branches the block will have
pub pending_outgoing: Vec<PendingBranchRef>,
@@ -96,15 +101,33 @@ pub struct JITState {
/// not been written to for the block to be valid.
pub stable_constant_names_assumption: Option<*const ID>,
+ /// A list of classes that are not supposed to have a singleton class.
+ pub no_singleton_class_assumptions: Vec<VALUE>,
+
+ /// When true, the block is valid only when base pointer is equal to environment pointer.
+ pub no_ep_escape: bool,
+
/// When true, the block is valid only when there is a total of one ractor running
pub block_assumes_single_ractor: bool,
/// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt)
perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>,
+
+ /// Stack of symbol names for --yjit-perf
+ perf_stack: Vec<String>,
+
+ /// When true, this block is the first block compiled by gen_block_series().
+ first_block: bool,
+
+ /// A killswitch for bailing out of compilation. Used in rare situations where we need to fail
+ /// compilation deep in the stack (e.g. codegen failed for some jump target, but not due to
+ /// OOM). Because these situations are so rare it's not worth it to check and propogate at each
+ /// site. Instead, we check this once at the end.
+ block_abandoned: bool,
}
-impl JITState {
- pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr) -> Self {
+impl<'a> JITState<'a> {
+ pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self {
JITState {
iseq: blockid.iseq,
starting_insn_idx: blockid.idx,
@@ -116,13 +139,19 @@ impl JITState {
stack_size_for_pc: starting_ctx.get_stack_size(),
pending_outgoing: vec![],
ec,
+ outlined_code_block: ocb,
record_boundary_patch_point: false,
block_entry_exit: None,
method_lookup_assumptions: vec![],
bop_assumptions: vec![],
stable_constant_names_assumption: None,
+ no_singleton_class_assumptions: vec![],
+ no_ep_escape: false,
block_assumes_single_ractor: false,
perf_map: Rc::default(),
+ perf_stack: vec![],
+ first_block,
+ block_abandoned: false,
}
}
@@ -130,15 +159,15 @@ impl JITState {
self.insn_idx
}
- pub fn get_iseq(self: &JITState) -> IseqPtr {
+ pub fn get_iseq(&self) -> IseqPtr {
self.iseq
}
- pub fn get_opcode(self: &JITState) -> usize {
+ pub fn get_opcode(&self) -> usize {
self.opcode
}
- pub fn get_pc(self: &JITState) -> *mut VALUE {
+ pub fn get_pc(&self) -> *mut VALUE {
self.pc
}
@@ -161,14 +190,89 @@ impl JITState {
unsafe { *(self.pc.offset(arg_idx + 1)) }
}
+ /// Get [Self::outlined_code_block]
+ pub fn get_ocb(&mut self) -> &mut OutlinedCb {
+ self.outlined_code_block
+ }
+
+ /// Leave a code stub to re-enter the compiler at runtime when the compiling program point is
+ /// reached. Should always be used in tail position like `return jit.defer_compilation(asm);`.
+ #[must_use]
+ fn defer_compilation(&mut self, asm: &mut Assembler) -> Option<CodegenStatus> {
+ if crate::core::defer_compilation(self, asm).is_err() {
+ // If we can't leave a stub, the block isn't usable and we have to bail.
+ self.block_abandoned = true;
+ }
+ Some(EndBlock)
+ }
+
+ /// Generate a branch with either end possibly stubbed out
+ fn gen_branch(
+ &mut self,
+ asm: &mut Assembler,
+ target0: BlockId,
+ ctx0: &Context,
+ target1: Option<BlockId>,
+ ctx1: Option<&Context>,
+ gen_fn: BranchGenFn,
+ ) {
+ if crate::core::gen_branch(self, asm, target0, ctx0, target1, ctx1, gen_fn).is_none() {
+ // If we can't meet the request for a branch, the code is
+ // essentially corrupt and we have to discard the block.
+ self.block_abandoned = true;
+ }
+ }
+
+ /// Wrapper for [self::gen_outlined_exit] with error handling.
+ fn gen_outlined_exit(&mut self, exit_pc: *mut VALUE, ctx: &Context) -> Option<CodePtr> {
+ let result = gen_outlined_exit(exit_pc, self.num_locals(), ctx, self.get_ocb());
+ if result.is_none() {
+ // When we can't have the exits, the code is incomplete and we have to bail.
+ self.block_abandoned = true;
+ }
+
+ result
+ }
+
+ /// Return true if the current ISEQ could escape an environment.
+ ///
+ /// As of vm_push_frame(), EP is always equal to BP. However, after pushing
+ /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP.
+ /// Also, some method calls escape the environment to the heap.
+ fn escapes_ep(&self) -> bool {
+ match unsafe { get_iseq_body_type(self.iseq) } {
+ // <main> frame is always associated to TOPLEVEL_BINDING.
+ ISEQ_TYPE_MAIN |
+ // Kernel#eval uses a heap EP when a Binding argument is not nil.
+ ISEQ_TYPE_EVAL => true,
+ // If this ISEQ has previously escaped EP, give up the optimization.
+ _ if iseq_escapes_ep(self.iseq) => true,
+ _ => false,
+ }
+ }
+
// Get the index of the next instruction
fn next_insn_idx(&self) -> u16 {
self.insn_idx + insn_len(self.get_opcode()) as u16
}
- // Check if we are compiling the instruction at the stub PC
+ /// Get the index of the next instruction of the next instruction
+ fn next_next_insn_idx(&self) -> u16 {
+ let next_pc = unsafe { rb_iseq_pc_at_idx(self.iseq, self.next_insn_idx().into()) };
+ let next_opcode: usize = unsafe { rb_iseq_opcode_at_pc(self.iseq, next_pc) }.try_into().unwrap();
+ self.next_insn_idx() + insn_len(next_opcode) as u16
+ }
+
+ // Check if we are compiling the instruction at the stub PC with the target Context
// Meaning we are compiling the instruction that is next to execute
- pub fn at_current_insn(&self) -> bool {
+ pub fn at_compile_target(&self) -> bool {
+ // If this is not the first block compiled by gen_block_series(),
+ // it might be compiling the same block again with a different Context.
+ // In that case, it should defer_compilation() and inspect the stack there.
+ if !self.first_block {
+ return false;
+ }
+
let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
ec_pc == self.pc
}
@@ -176,7 +280,7 @@ impl JITState {
// Peek at the nth topmost value on the Ruby stack.
// Returns the topmost value when n == 0.
pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
assert!(n < ctx.get_stack_size() as isize);
// Note: this does not account for ctx->sp_offset because
@@ -195,7 +299,7 @@ impl JITState {
}
fn peek_at_local(&self, n: i32) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
.try_into()
@@ -211,7 +315,7 @@ impl JITState {
}
fn peek_at_block_handler(&self, level: u32) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
unsafe {
let ep = get_cfp_ep_level(self.get_cfp(), level);
@@ -219,19 +323,72 @@ impl JITState {
}
}
- pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, cme: CmePtr) -> Option<()> {
- jit_ensure_block_entry_exit(self, asm, ocb)?;
+ pub fn assume_expected_cfunc(
+ &mut self,
+ asm: &mut Assembler,
+ class: VALUE,
+ method: ID,
+ cfunc: *mut c_void,
+ ) -> bool {
+ let cme = unsafe { rb_callable_method_entry(class, method) };
+
+ if cme.is_null() {
+ return false;
+ }
+
+ let def_type = unsafe { get_cme_def_type(cme) };
+ if def_type != VM_METHOD_TYPE_CFUNC {
+ return false;
+ }
+ if unsafe { get_mct_func(get_cme_def_body_cfunc(cme)) } != cfunc {
+ return false;
+ }
+
+ self.assume_method_lookup_stable(asm, cme);
+
+ true
+ }
+
+ pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, cme: CmePtr) -> Option<()> {
+ jit_ensure_block_entry_exit(self, asm)?;
self.method_lookup_assumptions.push(cme);
Some(())
}
+ /// Assume that objects of a given class will have no singleton class.
+ /// Return true if there has been no such singleton class since boot
+ /// and we can safely invalidate it.
+ pub fn assume_no_singleton_class(&mut self, asm: &mut Assembler, klass: VALUE) -> bool {
+ if jit_ensure_block_entry_exit(self, asm).is_none() {
+ return false; // out of space, give up
+ }
+ if has_singleton_class_of(klass) {
+ return false; // we've seen a singleton class. disable the optimization to avoid an invalidation loop.
+ }
+ self.no_singleton_class_assumptions.push(klass);
+ true
+ }
+
+ /// Assume that base pointer is equal to environment pointer in the current ISEQ.
+ /// Return true if it's safe to assume so.
+ fn assume_no_ep_escape(&mut self, asm: &mut Assembler) -> bool {
+ if jit_ensure_block_entry_exit(self, asm).is_none() {
+ return false; // out of space, give up
+ }
+ if self.escapes_ep() {
+ return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop.
+ }
+ self.no_ep_escape = true;
+ true
+ }
+
fn get_cfp(&self) -> *mut rb_control_frame_struct {
unsafe { get_ec_cfp(self.ec) }
}
- pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, id: *const ID) -> Option<()> {
- jit_ensure_block_entry_exit(self, asm, ocb)?;
+ pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, id: *const ID) -> Option<()> {
+ jit_ensure_block_entry_exit(self, asm)?;
self.stable_constant_names_assumption = Some(id);
Some(())
@@ -241,9 +398,27 @@ impl JITState {
self.pending_outgoing.push(branch)
}
+ /// Push a symbol for --yjit-perf
+ fn perf_symbol_push(&mut self, asm: &mut Assembler, symbol_name: &str) {
+ if !self.perf_stack.is_empty() {
+ self.perf_symbol_range_end(asm);
+ }
+ self.perf_stack.push(symbol_name.to_string());
+ self.perf_symbol_range_start(asm, symbol_name);
+ }
+
+ /// Pop the stack-top symbol for --yjit-perf
+ fn perf_symbol_pop(&mut self, asm: &mut Assembler) {
+ self.perf_symbol_range_end(asm);
+ self.perf_stack.pop();
+ if let Some(symbol_name) = self.perf_stack.get(0) {
+ self.perf_symbol_range_start(asm, symbol_name);
+ }
+ }
+
/// Mark the start address of a symbol to be reported to perf
fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) {
- let symbol_name = symbol_name.to_string();
+ let symbol_name = format!("[JIT] {}", symbol_name);
let syms = self.perf_map.clone();
asm.pos_marker(move |start, _| syms.borrow_mut().push((start, None, symbol_name.clone())));
}
@@ -261,8 +436,9 @@ impl JITState {
/// Flush addresses and symbols to /tmp/perf-{pid}.map
fn flush_perf_symbols(&self, cb: &CodeBlock) {
+ assert_eq!(0, self.perf_stack.len());
let path = format!("/tmp/perf-{}.map", std::process::id());
- let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
+ let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap());
for sym in self.perf_map.borrow().iter() {
if let (start, Some(end), name) = sym {
// In case the code straddles two pages, part of it belongs to the symbol.
@@ -274,9 +450,61 @@ impl JITState {
}
}
}
+
+ /// Return true if we're compiling a send-like instruction, not an opt_* instruction.
+ pub fn is_sendish(&self) -> bool {
+ match unsafe { rb_iseq_opcode_at_pc(self.iseq, self.pc) } as u32 {
+ YARVINSN_send |
+ YARVINSN_opt_send_without_block |
+ YARVINSN_invokesuper => true,
+ _ => false,
+ }
+ }
+
+ /// Return the number of locals in the current ISEQ
+ pub fn num_locals(&self) -> u32 {
+ unsafe { get_iseq_body_local_table_size(self.iseq) }
+ }
+}
+
+/// Macro to call jit.perf_symbol_push() without evaluating arguments when
+/// the option is turned off, which is useful for avoiding string allocation.
+macro_rules! jit_perf_symbol_push {
+ ($jit:expr, $asm:expr, $symbol_name:expr, $perf_map:expr) => {
+ if get_option!(perf_map) == Some($perf_map) {
+ $jit.perf_symbol_push($asm, $symbol_name);
+ }
+ };
+}
+
+/// Macro to call jit.perf_symbol_pop(), for consistency with jit_perf_symbol_push!().
+macro_rules! jit_perf_symbol_pop {
+ ($jit:expr, $asm:expr, $perf_map:expr) => {
+ if get_option!(perf_map) == Some($perf_map) {
+ $jit.perf_symbol_pop($asm);
+ }
+ };
+}
+
+/// Macro to push and pop a perf symbol around a function call.
+macro_rules! perf_call {
+ // perf_call!("prefix: ", func(...)) uses "prefix: func" as a symbol.
+ ($prefix:expr, $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) ) => {
+ {
+ jit_perf_symbol_push!($jit, $asm, &format!("{}{}", $prefix, stringify!($func_name)), PerfMap::Codegen);
+ let ret = $func_name($jit, $asm, $($arg),*);
+ jit_perf_symbol_pop!($jit, $asm, PerfMap::Codegen);
+ ret
+ }
+ };
+ // perf_call! { func(...) } uses "func" as a symbol.
+ { $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) } => {
+ perf_call!("", $func_name($jit, $asm, $($arg),*))
+ };
}
use crate::codegen::JCCKinds::*;
+use crate::log::Log;
#[allow(non_camel_case_types, unused)]
pub enum JCCKinds {
@@ -291,8 +519,31 @@ pub enum JCCKinds {
JCC_JO_MUL,
}
+/// Generate code to increment a given counter. With --yjit-trace-exits=counter,
+/// the counter is traced when it's incremented by this function.
#[inline(always)]
-fn gen_counter_incr(asm: &mut Assembler, counter: Counter) {
+fn gen_counter_incr(jit: &JITState, asm: &mut Assembler, counter: Counter) {
+ gen_counter_incr_with_pc(asm, counter, jit.pc);
+}
+
+/// Same as gen_counter_incr(), but takes PC isntead of JITState.
+#[inline(always)]
+fn gen_counter_incr_with_pc(asm: &mut Assembler, counter: Counter, pc: *mut VALUE) {
+ gen_counter_incr_without_pc(asm, counter);
+
+ // Trace a counter if --yjit-trace-exits=counter is given.
+ // TraceExits::All is handled by gen_exit().
+ if get_option!(trace_exits) == Some(TraceExits::Counter(counter)) {
+ with_caller_saved_temp_regs(asm, |asm| {
+ asm.ccall(rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(pc as *const u8)]);
+ });
+ }
+}
+
+/// Generate code to increment a given counter. Not traced by --yjit-trace-exits=counter
+/// unlike gen_counter_incr() or gen_counter_incr_with_pc().
+#[inline(always)]
+fn gen_counter_incr_without_pc(asm: &mut Assembler, counter: Counter) {
// Assert that default counters are not incremented by generated code as this would impact performance
assert!(!DEFAULT_COUNTERS.contains(&counter), "gen_counter_incr incremented {:?}", counter);
@@ -332,7 +583,7 @@ fn gen_save_sp(asm: &mut Assembler) {
fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) {
if asm.ctx.get_sp_offset() != -offset {
asm_comment!(asm, "save SP to CFP");
- let stack_pointer = asm.ctx.sp_opnd((offset as i32 * SIZEOF_VALUE_I32) as isize);
+ let stack_pointer = asm.ctx.sp_opnd(offset as i32);
let sp_addr = asm.lea(stack_pointer);
asm.mov(SP, sp_addr);
let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
@@ -341,22 +592,87 @@ fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) {
}
}
-/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that
-/// could:
+/// Basically jit_prepare_non_leaf_call(), but this registers the current PC
+/// to lazily push a C method frame when it's necessary.
+fn jit_prepare_lazy_frame_call(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ cme: *const rb_callable_method_entry_t,
+ recv_opnd: YARVOpnd,
+) -> bool {
+ // We can use this only when the receiver is on stack.
+ let recv_idx = match recv_opnd {
+ StackOpnd(recv_idx) => recv_idx,
+ _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd),
+ };
+
+ // Get the next PC. jit_save_pc() saves that PC.
+ let pc: *mut VALUE = unsafe {
+ let cur_insn_len = insn_len(jit.get_opcode()) as isize;
+ jit.get_pc().offset(cur_insn_len)
+ };
+
+ let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc();
+ match pc_to_cfunc.get(&pc) {
+ Some(&(other_cme, _)) if other_cme != cme => {
+ // Bail out if it's not the only cme on this callsite.
+ incr_counter!(lazy_frame_failure);
+ return false;
+ }
+ _ => {
+ // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC.
+ incr_counter!(lazy_frame_count);
+ pc_to_cfunc.insert(pc, (cme, recv_idx));
+ }
+ }
+
+ // Save the PC to trigger a lazy frame push, and save the SP to get the receiver.
+ // The C func may call a method that doesn't raise, so prepare for invalidation too.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Make sure we're ready for calling rb_vm_push_cfunc_frame().
+ let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) };
+ if cfunc_argc != -1 {
+ assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible
+ }
+ assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen.
+
+ true
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could:
/// - Perform GC allocation
/// - Take the VM lock through RB_VM_LOCK_ENTER()
/// - Perform Ruby method call
-fn jit_prepare_routine_call(
+///
+/// If the routine doesn't call arbitrary methods, use jit_prepare_call_with_gc() instead.
+fn jit_prepare_non_leaf_call(
jit: &mut JITState,
asm: &mut Assembler
) {
- jit.record_boundary_patch_point = true;
- jit_save_pc(jit, asm);
- gen_save_sp(asm);
+ // Prepare for GC. Setting PC also prepares for showing a backtrace.
+ jit.record_boundary_patch_point = true; // VM lock could trigger invalidation
+ jit_save_pc(jit, asm); // for allocation tracing
+ gen_save_sp(asm); // protect objects from GC
// In case the routine calls Ruby methods, it can set local variables
- // through Kernel#binding and other means.
- asm.ctx.clear_local_types();
+ // through Kernel#binding, rb_debug_inspector API, and other means.
+ asm.clear_local_types();
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could:
+/// - Perform GC allocation
+/// - Take the VM lock through RB_VM_LOCK_ENTER()
+fn jit_prepare_call_with_gc(
+ jit: &mut JITState,
+ asm: &mut Assembler
+) {
+ jit.record_boundary_patch_point = true; // VM lock could trigger invalidation
+ jit_save_pc(jit, asm); // for allocation tracing
+ gen_save_sp(asm); // protect objects from GC
+
+ // Expect a leaf ccall(). You should use jit_prepare_non_leaf_call() if otherwise.
+ asm.expect_leaf_ccall();
}
/// Record the current codeblock write position for rewriting into a jump into
@@ -379,14 +695,36 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() }
}
+ // Some types such as CString only assert the class field of the object
+ // when there has never been a singleton class created for objects of that class.
+ // Once there is a singleton class created they become their weaker
+ // `T*` variant, and we more objects should pass the verification.
+ fn relax_type_with_singleton_class_assumption(ty: Type) -> Type {
+ if let Type::CString | Type::CArray | Type::CHash = ty {
+ if has_singleton_class_of(ty.known_class().unwrap()) {
+ match ty {
+ Type::CString => return Type::TString,
+ Type::CArray => return Type::TArray,
+ Type::CHash => return Type::THash,
+ _ => (),
+ }
+ }
+ }
+
+ ty
+ }
+
// Only able to check types when at current insn
- assert!(jit.at_current_insn());
+ assert!(jit.at_compile_target());
let self_val = jit.peek_at_self();
let self_val_type = Type::from(self_val);
+ let learned_self_type = ctx.get_opnd_type(SelfOpnd);
+ let learned_self_type = relax_type_with_singleton_class_assumption(learned_self_type);
+
// Verify self operand type
- if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == TypeDiff::Incompatible {
+ if self_val_type.diff(learned_self_type) == TypeDiff::Incompatible {
panic!(
"verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}",
ctx.get_opnd_type(SelfOpnd),
@@ -395,16 +733,17 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
// Verify stack operand types
- let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8);
+ let top_idx = cmp::min(ctx.get_stack_size(), MAX_CTX_TEMPS as u8);
for i in 0..top_idx {
let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i));
let learned_type = ctx.get_opnd_type(StackOpnd(i));
+ let learned_type = relax_type_with_singleton_class_assumption(learned_type);
let stack_val = jit.peek_at_stack(ctx, i as isize);
let val_type = Type::from(stack_val);
- match learned_mapping.get_kind() {
- TempMappingKind::MapToSelf => {
+ match learned_mapping {
+ TempMapping::MapToSelf => {
if self_val != stack_val {
panic!(
"verify_ctx: stack value was mapped to self, but values did not match!\n stack: {}\n self: {}",
@@ -413,8 +752,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
);
}
}
- TempMappingKind::MapToLocal => {
- let local_idx: u8 = learned_mapping.get_local_idx();
+ TempMapping::MapToLocal(local_idx) => {
let local_val = jit.peek_at_local(local_idx.into());
if local_val != stack_val {
panic!(
@@ -425,7 +763,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
);
}
}
- TempMappingKind::MapToStack => {}
+ TempMapping::MapToStack(_) => {}
}
// If the actual type differs from the learned type
@@ -441,9 +779,10 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
// Verify local variable types
let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
- let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES);
+ let top_idx: usize = cmp::min(local_table_size as usize, MAX_CTX_TEMPS);
for i in 0..top_idx {
let learned_type = ctx.get_local_type(i);
+ let learned_type = relax_type_with_singleton_class_assumption(learned_type);
let local_val = jit.peek_at_local(i as i32);
let local_type = Type::from(local_val);
@@ -464,9 +803,9 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
// interpreter state.
fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
- gen_counter_incr(&mut asm, Counter::exit_from_branch_stub);
+ gen_counter_incr_without_pc(&mut asm, Counter::exit_from_branch_stub);
asm_comment!(asm, "exit from branch stub");
asm.cpop_into(SP);
@@ -482,11 +821,11 @@ fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
/// Generate an exit to return to the interpreter
fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
- #[cfg(all(feature = "disasm", not(test)))]
- {
+ #[cfg(not(test))]
+ asm_comment!(asm, "exit to interpreter on {}", {
let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
- asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize));
- }
+ insn_name(opcode as usize)
+ });
if asm.ctx.is_return_landing() {
asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
@@ -495,7 +834,7 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
}
// Spill stack temps before returning to the interpreter
- asm.spill_temps();
+ asm.spill_regs();
// Generate the code to exit to the interpreters
// Write the adjusted SP back into the CFP
@@ -520,9 +859,9 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
vec![Opnd::const_ptr(exit_pc as *const u8)]
);
- // If --yjit-trace-exits option is enabled, record the exit stack
- // while recording the side exits.
- if get_option!(gen_trace_exits) {
+ // If --yjit-trace-exits is enabled, record the exit stack while recording
+ // the side exits. TraceExits::Counter is handled by gen_counted_exit().
+ if get_option!(trace_exits) == Some(TraceExits::All) {
asm.ccall(
rb_yjit_record_exit_stack as *const u8,
vec![Opnd::const_ptr(exit_pc as *const u8)]
@@ -551,11 +890,15 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
/// moment, so there is one unique side exit for each context. Note that
/// it's incorrect to jump to the side exit after any ctx stack push operations
/// since they change the logic required for reconstructing interpreter state.
-pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> {
+///
+/// If you're in [the codegen module][self], use [JITState::gen_outlined_exit]
+/// instead of calling this directly.
+#[must_use]
+pub fn gen_outlined_exit(exit_pc: *mut VALUE, num_locals: u32, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> {
let mut cb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(num_locals);
asm.ctx = *ctx;
- asm.set_reg_temps(ctx.get_reg_temps());
+ asm.set_reg_mapping(ctx.get_reg_mapping());
gen_exit(exit_pc, &mut asm);
@@ -563,7 +906,7 @@ pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedC
}
/// Get a side exit. Increment a counter in it if --yjit-stats is enabled.
-pub fn gen_counted_exit(side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> {
+pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> {
// The counter is only incremented when stats are enabled
if !get_option!(gen_stats) {
return Some(side_exit);
@@ -573,15 +916,10 @@ pub fn gen_counted_exit(side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Optio
None => return Some(side_exit),
};
- let mut asm = Assembler::new();
-
- // Load the pointer into a register
- asm_comment!(asm, "increment counter {}", counter.get_name());
- let ptr_reg = asm.load(Opnd::const_ptr(get_counter_ptr(&counter.get_name()) as *const u8));
- let counter_opnd = Opnd::mem(64, ptr_reg, 0);
+ let mut asm = Assembler::new_without_iseq();
- // Increment and store the updated value
- asm.incr_counter(counter_opnd, Opnd::UImm(1));
+ // Increment a counter
+ gen_counter_incr_with_pc(&mut asm, counter, exit_pc);
// Jump to the existing side exit
asm.jmp(Target::CodePtr(side_exit));
@@ -590,10 +928,22 @@ pub fn gen_counted_exit(side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Optio
asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
+/// Preserve caller-saved stack temp registers during the call of a given block
+fn with_caller_saved_temp_regs<F, R>(asm: &mut Assembler, block: F) -> R where F: FnOnce(&mut Assembler) -> R {
+ for &reg in caller_saved_temp_regs() {
+ asm.cpush(Opnd::Reg(reg)); // save stack temps
+ }
+ let ret = block(asm);
+ for &reg in caller_saved_temp_regs().rev() {
+ asm.cpop_into(Opnd::Reg(reg)); // restore stack temps
+ }
+ ret
+}
+
// Ensure that there is an exit for the start of the block being compiled.
// Block invalidation uses this exit.
#[must_use]
-pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> Option<()> {
+pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> Option<()> {
if jit.block_entry_exit.is_some() {
return Some(());
}
@@ -604,11 +954,11 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb:
if jit.insn_idx == jit.starting_insn_idx {
// Generate the exit with the cache in Assembler.
let side_exit_context = SideExitContext::new(jit.pc, *block_starting_context);
- let entry_exit = asm.get_side_exit(&side_exit_context, None, ocb);
+ let entry_exit = asm.get_side_exit(&side_exit_context, None, jit.get_ocb());
jit.block_entry_exit = Some(entry_exit?);
} else {
let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) };
- jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, ocb)?);
+ jit.block_entry_exit = Some(jit.gen_outlined_exit(block_entry_pc, block_starting_context)?);
}
Some(())
@@ -617,7 +967,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb:
// Landing code for when c_return tracing is enabled. See full_cfunc_return().
fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
// This chunk of code expects REG_EC to be filled properly and
// RAX to contain the return value of the C method.
@@ -629,7 +979,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
);
// Count the exit
- gen_counter_incr(&mut asm, Counter::traced_cfunc_return);
+ gen_counter_incr_without_pc(&mut asm, Counter::traced_cfunc_return);
// Return to the interpreter
asm.cpop_into(SP);
@@ -647,14 +997,14 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
/// This is used by gen_leave() and gen_entry_prologue()
fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
// gen_leave() fully reconstructs interpreter state and leaves the
// return value in C_RET_OPND before coming here.
let ret_opnd = asm.live_reg_opnd(C_RET_OPND);
// Every exit to the interpreter should be counted
- gen_counter_incr(&mut asm, Counter::leave_interp_return);
+ gen_counter_incr_without_pc(&mut asm, Counter::leave_interp_return);
asm_comment!(asm, "exit from leave");
asm.cpop_into(SP);
@@ -674,13 +1024,13 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
// the caller's stack, which is different from gen_stub_exit().
fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
// gen_leave() leaves the return value in C_RET_OPND before coming here.
let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND);
// Every exit to the interpreter should be counted
- gen_counter_incr(&mut asm, Counter::leave_interp_return);
+ gen_counter_incr_without_pc(&mut asm, Counter::leave_interp_return);
asm_comment!(asm, "push return value through cfp->sp");
let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
@@ -711,14 +1061,13 @@ fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> {
pub fn gen_entry_chain_guard(
asm: &mut Assembler,
ocb: &mut OutlinedCb,
- iseq: IseqPtr,
- insn_idx: u16,
+ blockid: BlockId,
) -> Option<PendingEntryRef> {
let entry = new_pending_entry();
let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?;
let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC);
- let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
+ let expected_pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx.into()) };
let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8);
asm_comment!(asm, "guard expected PC");
@@ -733,22 +1082,19 @@ pub fn gen_entry_chain_guard(
/// Compile an interpreter entry block to be inserted into an iseq
/// Returns None if compilation fails.
/// If jit_exception is true, compile JIT code for handling exceptions.
-/// See [jit_compile_exception] for details.
+/// See jit_compile_exception() for details.
pub fn gen_entry_prologue(
cb: &mut CodeBlock,
ocb: &mut OutlinedCb,
- iseq: IseqPtr,
- insn_idx: u16,
+ blockid: BlockId,
+ stack_size: u8,
jit_exception: bool,
-) -> Option<CodePtr> {
+) -> Option<(CodePtr, RegMapping)> {
+ let iseq = blockid.iseq;
let code_ptr = cb.get_write_ptr();
- let mut asm = Assembler::new();
- if get_option_ref!(dump_disasm).is_some() {
- asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
- } else {
- asm_comment!(asm, "YJIT entry");
- }
+ let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
+ asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
asm.frame_setup();
@@ -795,10 +1141,11 @@ pub fn gen_entry_prologue(
// If they don't match, then we'll jump to an entry stub and generate
// another PC check and entry there.
let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception {
- Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?)
+ Some(gen_entry_chain_guard(&mut asm, ocb, blockid)?)
} else {
None
};
+ let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size);
asm.compile(cb, Some(ocb))?;
@@ -816,8 +1163,37 @@ pub fn gen_entry_prologue(
.ok().expect("PendingEntry should be unique");
iseq_payload.entries.push(pending_entry.into_entry());
}
- Some(code_ptr)
+ Some((code_ptr, reg_mapping))
+ }
+}
+
+/// Generate code to load registers for a JIT entry. When the entry block is compiled for
+/// the first time, it loads no register. When it has been already compiled as a callee
+/// block, it loads some registers to reuse the block.
+pub fn gen_entry_reg_mapping(asm: &mut Assembler, blockid: BlockId, stack_size: u8) -> RegMapping {
+ // Find an existing callee block. If it's not found or uses no register, skip loading registers.
+ let mut ctx = Context::default();
+ ctx.set_stack_size(stack_size);
+ let reg_mapping = find_most_compatible_reg_mapping(blockid, &ctx).unwrap_or(RegMapping::default());
+ if reg_mapping == RegMapping::default() {
+ return reg_mapping;
+ }
+
+ // If found, load the same registers to reuse the block.
+ asm_comment!(asm, "reuse maps: {:?}", reg_mapping);
+ let local_table_size: u32 = unsafe { get_iseq_body_local_table_size(blockid.iseq) }.try_into().unwrap();
+ for &reg_opnd in reg_mapping.get_reg_opnds().iter() {
+ match reg_opnd {
+ RegOpnd::Local(local_idx) => {
+ let loaded_reg = TEMP_REGS[reg_mapping.get_reg(reg_opnd).unwrap()];
+ let loaded_temp = asm.local_opnd(local_table_size - local_idx as u32 + VM_ENV_DATA_SIZE - 1);
+ asm.load_into(Opnd::Reg(loaded_reg), loaded_temp);
+ }
+ RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd),
+ }
}
+
+ reg_mapping
}
// Generate code to check for interrupts and take a side-exit.
@@ -832,7 +1208,7 @@ fn gen_check_ints(
// Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages,
// signal_exec, or rb_postponed_job_flush.
- let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG));
+ let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32));
asm.test(interrupt_flag, interrupt_flag);
asm.jnz(Target::side_exit(counter));
@@ -843,29 +1219,36 @@ fn gen_check_ints(
fn jump_to_next_insn(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
-) -> Option<()> {
- // Reset the depth since in current usages we only ever jump to to
+) -> Option<CodegenStatus> {
+ end_block_with_jump(jit, asm, jit.next_insn_idx())
+}
+
+fn end_block_with_jump(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ continuation_insn_idx: u16,
+) -> Option<CodegenStatus> {
+ // Reset the depth since in current usages we only ever jump to
// chain_depth > 0 from the same instruction.
let mut reset_depth = asm.ctx;
- reset_depth.reset_chain_depth();
+ reset_depth.reset_chain_depth_and_defer();
let jump_block = BlockId {
iseq: jit.iseq,
- idx: jit.next_insn_idx(),
+ idx: continuation_insn_idx,
};
// We are at the end of the current instruction. Record the boundary.
if jit.record_boundary_patch_point {
jit.record_boundary_patch_point = false;
- let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) };
- let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb);
+ let exit_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, continuation_insn_idx.into())};
+ let exit_pos = jit.gen_outlined_exit(exit_pc, &reset_depth);
record_global_inval_patch(asm, exit_pos?);
}
// Generate the jump instruction
gen_direct_jump(jit, &reset_depth, jump_block, asm);
- Some(())
+ Some(EndBlock)
}
// Compile a sequence of bytecode instructions for a given basic block version.
@@ -878,6 +1261,7 @@ pub fn gen_single_block(
ec: EcPtr,
cb: &mut CodeBlock,
ocb: &mut OutlinedCb,
+ first_block: bool,
) -> Result<BlockRef, ()> {
// Limit the number of specialized versions for this block
let ctx = limit_block_versions(blockid, start_ctx);
@@ -901,33 +1285,24 @@ pub fn gen_single_block(
let mut insn_idx: IseqIdx = blockid.idx;
// Initialize a JIT state object
- let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec);
+ let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block);
jit.iseq = blockid.iseq;
// Create a backend assembler instance
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(jit.num_locals());
asm.ctx = ctx;
- #[cfg(feature = "disasm")]
if get_option_ref!(dump_disasm).is_some() {
let blockid_idx = blockid.idx;
let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() };
asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth);
- asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
+ asm_comment!(asm, "reg_mapping: {:?}", asm.ctx.get_reg_mapping());
}
- // Mark the start of a method name symbol for --yjit-perf
- if get_option!(perf_map) {
- let comptime_recv_class = jit.peek_at_self().class_of();
- let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_class)) };
- match (class_name, unsafe { rb_iseq_label(iseq) }) {
- (Some(class_name), iseq_label) if iseq_label != Qnil => {
- let iseq_label = ruby_str_to_rust(iseq_label);
- jit.perf_symbol_range_start(&mut asm, &format!("[JIT] {}#{}", class_name, iseq_label));
- }
- _ => {},
- }
- }
+ Log::add_block_with_chain_depth(blockid, asm.ctx.get_chain_depth());
+
+ // Mark the start of an ISEQ for --yjit-perf
+ jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ);
if asm.ctx.is_return_landing() {
// Continuation of the end of gen_leave().
@@ -955,7 +1330,7 @@ pub fn gen_single_block(
// if we run into it. This is necessary because we want to invalidate based on the
// instruction's index.
if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > jit.starting_insn_idx {
- jump_to_next_insn(&mut jit, &mut asm, ocb);
+ jump_to_next_insn(&mut jit, &mut asm);
break;
}
@@ -968,27 +1343,27 @@ pub fn gen_single_block(
// stack_pop doesn't immediately deallocate a register for stack temps,
// but it's safe to do so at this instruction boundary.
- for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS {
- asm.ctx.dealloc_temp_reg(stack_idx);
+ for stack_idx in asm.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
+ asm.ctx.dealloc_reg(RegOpnd::Stack(stack_idx));
}
// If previous instruction requested to record the boundary
if jit.record_boundary_patch_point {
// Generate an exit to this instruction and record it
- let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, ocb).ok_or(())?;
+ let exit_pos = jit.gen_outlined_exit(jit.pc, &asm.ctx).ok_or(())?;
record_global_inval_patch(&mut asm, exit_pos);
jit.record_boundary_patch_point = false;
}
// In debug mode, verify our existing assumption
- if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() {
+ if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() {
verify_ctx(&jit, &asm.ctx);
}
// :count-placement:
// Count bytecode instructions that execute in generated code.
// Note that the increment happens even when the output takes side exit.
- gen_counter_incr(&mut asm, Counter::yjit_insns_count);
+ gen_counter_incr(&jit, &mut asm, Counter::yjit_insns_count);
// Lookup the codegen function for this instruction
let mut status = None;
@@ -1003,7 +1378,12 @@ pub fn gen_single_block(
}
// Call the code generation function
- status = gen_fn(&mut jit, &mut asm, ocb);
+ jit_perf_symbol_push!(jit, &mut asm, &insn_name(opcode), PerfMap::Codegen);
+ status = gen_fn(&mut jit, &mut asm);
+ jit_perf_symbol_pop!(jit, &mut asm, PerfMap::Codegen);
+
+ #[cfg(debug_assertions)]
+ assert!(!asm.get_leaf_ccall(), "ccall() wasn't used after leaf_ccall was set in {}", insn_name(opcode));
}
// If we can't compile this instruction
@@ -1029,7 +1409,7 @@ pub fn gen_single_block(
// For now, reset the chain depth after each instruction as only the
// first instruction in the block can concern itself with the depth.
- asm.ctx.reset_chain_depth();
+ asm.ctx.reset_chain_depth_and_defer();
// Move to the next instruction to compile
insn_idx += insn_len(opcode) as u16;
@@ -1045,27 +1425,31 @@ pub fn gen_single_block(
// doesn't go to the next instruction in the same iseq.
assert!(!jit.record_boundary_patch_point);
+ // Bail when requested to.
+ if jit.block_abandoned {
+ incr_counter!(abandoned_block_count);
+ return Err(());
+ }
+
// Pad the block if it has the potential to be invalidated
if jit.block_entry_exit.is_some() {
asm.pad_inval_patch();
}
- // Mark the end of a method name symbol for --yjit-perf
- if get_option!(perf_map) {
- jit.perf_symbol_range_end(&mut asm);
- }
+ // Mark the end of an ISEQ for --yjit-perf
+ jit_perf_symbol_pop!(jit, &mut asm, PerfMap::ISEQ);
// Compile code into the code block
- let (_, gc_offsets) = asm.compile(cb, Some(ocb)).ok_or(())?;
+ let (_, gc_offsets) = asm.compile(cb, Some(jit.get_ocb())).ok_or(())?;
let end_addr = cb.get_write_ptr();
// Flush perf symbols after asm.compile() writes addresses
- if get_option!(perf_map) {
+ if get_option!(perf_map).is_some() {
jit.flush_perf_symbols(cb);
}
// If code for the block doesn't fit, fail
- if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
+ if cb.has_dropped_bytes() || jit.get_ocb().unwrap().has_dropped_bytes() {
return Err(());
}
@@ -1076,7 +1460,6 @@ pub fn gen_single_block(
fn gen_nop(
_jit: &mut JITState,
_asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Do nothing
Some(KeepCompiling)
@@ -1085,7 +1468,6 @@ fn gen_nop(
fn gen_pop(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Decrement SP
asm.stack_pop(1);
@@ -1095,7 +1477,6 @@ fn gen_pop(
fn gen_dup(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let dup_val = asm.stack_opnd(0);
let mapping = asm.ctx.get_opnd_mapping(dup_val.into());
@@ -1110,7 +1491,6 @@ fn gen_dup(
fn gen_dupn(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_usize();
@@ -1134,11 +1514,22 @@ fn gen_dupn(
Some(KeepCompiling)
}
+// Reverse top X stack entries
+fn gen_opt_reverse(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let count = jit.get_arg(0).as_i32();
+ for n in 0..(count/2) {
+ stack_swap(asm, n, count - 1 - n);
+ }
+ Some(KeepCompiling)
+}
+
// Swap top 2 stack entries
fn gen_swap(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
stack_swap(asm, 0, 1);
Some(KeepCompiling)
@@ -1146,11 +1537,11 @@ fn gen_swap(
fn stack_swap(
asm: &mut Assembler,
- offset0: u16,
- offset1: u16,
+ offset0: i32,
+ offset1: i32,
) {
- let stack0_mem = asm.stack_opnd(offset0 as i32);
- let stack1_mem = asm.stack_opnd(offset1 as i32);
+ let stack0_mem = asm.stack_opnd(offset0);
+ let stack1_mem = asm.stack_opnd(offset1);
let mapping0 = asm.ctx.get_opnd_mapping(stack0_mem.into());
let mapping1 = asm.ctx.get_opnd_mapping(stack1_mem.into());
@@ -1167,7 +1558,6 @@ fn stack_swap(
fn gen_putnil(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
jit_putobject(asm, Qnil);
Some(KeepCompiling)
@@ -1182,7 +1572,6 @@ fn jit_putobject(asm: &mut Assembler, arg: VALUE) {
fn gen_putobject_int2fix(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let opcode = jit.opcode;
let cst_val: usize = if opcode == YARVINSN_putobject_INT2FIX_0_.as_usize() {
@@ -1190,26 +1579,85 @@ fn gen_putobject_int2fix(
} else {
1
};
+ let cst_val = VALUE::fixnum_from_usize(cst_val);
+
+ if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, cst_val) {
+ return Some(result);
+ }
- jit_putobject(asm, VALUE::fixnum_from_usize(cst_val));
+ jit_putobject(asm, cst_val);
Some(KeepCompiling)
}
fn gen_putobject(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let arg: VALUE = jit.get_arg(0);
+ if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, arg) {
+ return Some(result);
+ }
+
jit_putobject(asm, arg);
Some(KeepCompiling)
}
+/// Combine `putobject` and `opt_ltlt` together if profitable, for example when
+/// left shifting an integer by a constant amount.
+fn fuse_putobject_opt_ltlt(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ constant_object: VALUE,
+) -> Option<CodegenStatus> {
+ let next_opcode = unsafe { rb_vm_insn_addr2opcode(jit.pc.add(insn_len(jit.opcode).as_usize()).read().as_ptr()) };
+ if next_opcode == YARVINSN_opt_ltlt as i32 && constant_object.fixnum_p() {
+ // Untag the fixnum shift amount
+ let shift_amt = constant_object.as_isize() >> 1;
+ if shift_amt > 63 || shift_amt < 0 {
+ return None;
+ }
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
+ let lhs = jit.peek_at_stack(&asm.ctx, 0);
+ if !lhs.fixnum_p() {
+ return None;
+ }
+
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_LTLT) {
+ return None;
+ }
+
+ asm_comment!(asm, "integer left shift with rhs={shift_amt}");
+ let lhs = asm.stack_opnd(0);
+
+ // Guard that lhs is a fixnum if necessary
+ let lhs_type = asm.ctx.get_opnd_type(lhs.into());
+ if lhs_type != Type::Fixnum {
+ asm_comment!(asm, "guard arg0 fixnum");
+ asm.test(lhs, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnums,
+ );
+ }
+
+ asm.stack_pop(1);
+ fixnum_left_shift_body(asm, lhs, shift_amt as u64);
+ return end_block_with_jump(jit, asm, jit.next_next_insn_idx());
+ }
+ return None;
+}
+
fn gen_putself(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Write it on the stack
@@ -1225,7 +1673,6 @@ fn gen_putself(
fn gen_putspecialobject(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let object_type = jit.get_arg(0).as_usize();
@@ -1245,7 +1692,6 @@ fn gen_putspecialobject(
fn gen_setn(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_usize();
@@ -1266,7 +1712,6 @@ fn gen_setn(
fn gen_topn(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_usize();
@@ -1282,7 +1727,6 @@ fn gen_topn(
fn gen_adjuststack(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_usize();
asm.stack_pop(n);
@@ -1292,23 +1736,21 @@ fn gen_adjuststack(
fn gen_opt_plus(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands from the stack
let arg1 = asm.stack_pop(1);
@@ -1325,7 +1767,7 @@ fn gen_opt_plus(
Some(KeepCompiling)
} else {
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
@@ -1333,20 +1775,18 @@ fn gen_opt_plus(
fn gen_newarray(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_u32();
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
// If n is 0, then elts is never going to be read, so we can just pass null
let values_ptr = if n == 0 {
Opnd::UImm(0)
} else {
asm_comment!(asm, "load pointer to array elements");
- let offset_magnitude = (SIZEOF_VALUE as u32) * n;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(n as i32));
asm.lea(values_opnd)
};
@@ -1361,7 +1801,7 @@ fn gen_newarray(
);
asm.stack_pop(n.as_usize());
- let stack_ret = asm.stack_push(Type::TArray);
+ let stack_ret = asm.stack_push(Type::CArray);
asm.mov(stack_ret, new_ary);
Some(KeepCompiling)
@@ -1371,12 +1811,11 @@ fn gen_newarray(
fn gen_duparray(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let ary = jit.get_arg(0);
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
// call rb_ary_resurrect(VALUE ary);
let new_ary = asm.ccall(
@@ -1384,7 +1823,7 @@ fn gen_duparray(
vec![ary.into()],
);
- let stack_ret = asm.stack_push(Type::TArray);
+ let stack_ret = asm.stack_push(Type::CArray);
asm.mov(stack_ret, new_ary);
Some(KeepCompiling)
@@ -1394,17 +1833,16 @@ fn gen_duparray(
fn gen_duphash(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let hash = jit.get_arg(0);
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
// call rb_hash_resurrect(VALUE hash);
let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]);
- let stack_ret = asm.stack_push(Type::Hash);
+ let stack_ret = asm.stack_push(Type::CHash);
asm.mov(stack_ret, hash);
Some(KeepCompiling)
@@ -1414,13 +1852,12 @@ fn gen_duphash(
fn gen_splatarray(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let flag = jit.get_arg(0).as_usize();
- // Save the PC and SP because the callee may allocate
+ // Save the PC and SP because the callee may call #to_a
// Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Get the operands from the stack
let ary_opnd = asm.stack_opnd(0);
@@ -1435,15 +1872,76 @@ fn gen_splatarray(
Some(KeepCompiling)
}
+// call to_hash on hash to keyword splat before converting block
+// e.g. foo(**object, &block)
+fn gen_splatkw(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ // Defer compilation so we can specialize on a runtime hash operand
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
+ let comptime_hash = jit.peek_at_stack(&asm.ctx, 1);
+ if comptime_hash.hash_p() {
+ // If a compile-time hash operand is T_HASH, just guard that it's T_HASH.
+ let hash_opnd = asm.stack_opnd(1);
+ guard_object_is_hash(asm, hash_opnd, hash_opnd.into(), Counter::splatkw_not_hash);
+ } else if comptime_hash.nil_p() {
+ // Speculate we'll see nil if compile-time hash operand is nil
+ let hash_opnd = asm.stack_opnd(1);
+ let hash_opnd_type = asm.ctx.get_opnd_type(hash_opnd.into());
+
+ if hash_opnd_type != Type::Nil {
+ asm.cmp(hash_opnd, Qnil.into());
+ asm.jne(Target::side_exit(Counter::splatkw_not_nil));
+
+ if Type::Nil.diff(hash_opnd_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(hash_opnd.into(), Type::Nil);
+ }
+ }
+ } else {
+ // Otherwise, call #to_hash on the operand if it's not nil.
+
+ // Save the PC and SP because the callee may call #to_hash
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let block_opnd = asm.stack_opnd(0);
+ let block_type = asm.ctx.get_opnd_type(block_opnd.into());
+ let hash_opnd = asm.stack_opnd(1);
+
+ c_callable! {
+ fn to_hash_if_not_nil(mut obj: VALUE) -> VALUE {
+ if obj != Qnil {
+ obj = unsafe { rb_to_hash_type(obj) };
+ }
+ obj
+ }
+ }
+
+ let hash = asm.ccall(to_hash_if_not_nil as _, vec![hash_opnd]);
+ asm.stack_pop(2); // Keep it on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, hash);
+ asm.stack_push(block_type);
+ // Leave block_opnd spilled by ccall as is
+ asm.ctx.dealloc_reg(RegOpnd::Stack(asm.ctx.get_stack_size() - 1));
+ }
+
+ Some(KeepCompiling)
+}
+
// concat two arrays
fn gen_concatarray(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- // Save the PC and SP because the callee may allocate
+ // Save the PC and SP because the callee may call #to_a
// Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Get the operands from the stack
let ary2st_opnd = asm.stack_opnd(0);
@@ -1459,16 +1957,61 @@ fn gen_concatarray(
Some(KeepCompiling)
}
+// concat second array to first array.
+// first argument must already be an array.
+// attempts to convert second object to array using to_a.
+fn gen_concattoarray(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ // Save the PC and SP because the callee may call #to_a
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let ary2_opnd = asm.stack_opnd(0);
+ let ary1_opnd = asm.stack_opnd(1);
+
+ let ary = asm.ccall(rb_vm_concat_to_array as *const u8, vec![ary1_opnd, ary2_opnd]);
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
+
+ Some(KeepCompiling)
+}
+
+// push given number of objects to array directly before.
+fn gen_pushtoarray(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let num = jit.get_arg(0).as_u64();
+
+ // Save the PC and SP because the callee may allocate
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Get the operands from the stack
+ let ary_opnd = asm.stack_opnd(num as i32);
+ let objp_opnd = asm.lea(asm.ctx.sp_opnd(-(num as i32)));
+
+ let ary = asm.ccall(rb_ary_cat as *const u8, vec![ary_opnd, objp_opnd, num.into()]);
+ asm.stack_pop(num as usize + 1); // Keep it on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
+
+ Some(KeepCompiling)
+}
+
// new range initialized from top 2 values
fn gen_newrange(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let flag = jit.get_arg(0).as_usize();
// rb_range_new() allocates and can raise
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// val = rb_range_new(low, high, (int)flag);
let range_opnd = asm.ccall(
@@ -1540,11 +2083,83 @@ fn guard_object_is_array(
asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into());
asm.jne(Target::side_exit(counter));
- if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible {
+ if Type::TArray.diff(object_type) != TypeDiff::Incompatible {
asm.ctx.upgrade_opnd_type(object_opnd, Type::TArray);
}
}
+fn guard_object_is_hash(
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd,
+ counter: Counter,
+) {
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_hash() {
+ return;
+ }
+
+ let object_reg = match object {
+ Opnd::InsnOut { .. } => object,
+ _ => asm.load(object),
+ };
+ guard_object_is_heap(asm, object_reg, object_opnd, counter);
+
+ asm_comment!(asm, "guard object is hash");
+
+ // Pull out the type mask
+ let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into());
+
+ // Compare the result with T_HASH
+ asm.cmp(flags_opnd, (RUBY_T_HASH as u64).into());
+ asm.jne(Target::side_exit(counter));
+
+ if Type::THash.diff(object_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(object_opnd, Type::THash);
+ }
+}
+
+fn guard_object_is_fixnum(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd
+) {
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_heap() {
+ asm_comment!(asm, "arg is heap object");
+ asm.jmp(Target::side_exit(Counter::guard_send_not_fixnum));
+ return;
+ }
+
+ if object_type != Type::Fixnum && object_type.is_specific() {
+ asm_comment!(asm, "arg is not fixnum");
+ asm.jmp(Target::side_exit(Counter::guard_send_not_fixnum));
+ return;
+ }
+
+ assert!(!object_type.is_heap());
+ assert!(object_type == Type::Fixnum || object_type.is_unknown());
+
+ // If not fixnums at run-time, fall back
+ if object_type != Type::Fixnum {
+ asm_comment!(asm, "guard object fixnum");
+ asm.test(object, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum,
+ );
+ }
+
+ // Set the stack type in the context.
+ asm.ctx.upgrade_opnd_type(object.into(), Type::Fixnum);
+}
+
fn guard_object_is_string(
asm: &mut Assembler,
object: Opnd,
@@ -1572,7 +2187,7 @@ fn guard_object_is_string(
asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64));
asm.jne(Target::side_exit(counter));
- if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible {
+ if Type::TString.diff(object_type) != TypeDiff::Incompatible {
asm.ctx.upgrade_opnd_type(object_opnd, Type::TString);
}
}
@@ -1617,7 +2232,6 @@ fn guard_object_is_not_ruby2_keyword_hash(
fn gen_expandarray(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Both arguments are rb_num_t which is unsigned
let num = jit.get_arg(0).as_u32();
@@ -1625,27 +2239,27 @@ fn gen_expandarray(
// If this instruction has the splat flag, then bail out.
if flag & 0x01 != 0 {
- gen_counter_incr(asm, Counter::expandarray_splat);
+ gen_counter_incr(jit, asm, Counter::expandarray_splat);
return None;
}
// If this instruction has the postarg flag, then bail out.
if flag & 0x02 != 0 {
- gen_counter_incr(asm, Counter::expandarray_postarg);
+ gen_counter_incr(jit, asm, Counter::expandarray_postarg);
return None;
}
let array_opnd = asm.stack_opnd(0);
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let comptime_recv = jit.peek_at_stack(&asm.ctx, 0);
- // If the comptime receiver is not an array
+ // If the comptime receiver is not an array, speculate for when the `rb_check_array_type()`
+ // conversion returns nil and without side-effects (e.g. arbitrary method calls).
if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } {
// at compile time, ensure to_ary is not defined
let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) };
@@ -1653,18 +2267,23 @@ fn gen_expandarray(
// if to_ary is defined, return can't compile so to_ary can be called
if cme_def_type != VM_METHOD_TYPE_UNDEF {
- gen_counter_incr(asm, Counter::expandarray_to_ary);
+ gen_counter_incr(jit, asm, Counter::expandarray_to_ary);
+ return None;
+ }
+
+ // Bail when method_missing is defined to avoid generating code to call it.
+ // Also, for simplicity, bail when BasicObject#method_missing has been removed.
+ if !assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(method_missing)) {
+ gen_counter_incr(jit, asm, Counter::expandarray_method_missing);
return None;
}
// invalidate compile block if to_ary is later defined
- jit.assume_method_lookup_stable(asm, ocb, target_cme);
+ jit.assume_method_lookup_stable(asm, target_cme);
jit_guard_known_klass(
jit,
asm,
- ocb,
- comptime_recv.class_of(),
array_opnd,
array_opnd.into(),
comptime_recv,
@@ -1694,7 +2313,7 @@ fn gen_expandarray(
}
// Get the compile-time array length
- let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 };
+ let comptime_len = unsafe { rb_jit_array_len(comptime_recv) as u32 };
// Move the array from the stack and check that it's an array.
guard_object_is_array(
@@ -1722,7 +2341,6 @@ fn gen_expandarray(
JCC_JB,
jit,
asm,
- ocb,
EXPANDARRAY_MAX_CHAIN_DEPTH,
Counter::expandarray_chain_max_depth,
);
@@ -1734,7 +2352,6 @@ fn gen_expandarray(
JCC_JNE,
jit,
asm,
- ocb,
EXPANDARRAY_MAX_CHAIN_DEPTH,
Counter::expandarray_chain_max_depth,
);
@@ -1810,7 +2427,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
// Gets the EP of the ISeq of the containing method, or "local level".
// Equivalent of GET_LEP() macro.
-fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd {
+fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
// Equivalent of get_lvar_level() in compile.c
fn get_lvar_level(iseq: IseqPtr) -> u32 {
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
@@ -1830,13 +2447,32 @@ fn gen_getlocal_generic(
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
- // Load environment pointer EP (level 0) from CFP
- let ep_opnd = gen_get_ep(asm, level);
+ // Split the block if we need to invalidate this instruction when EP escapes
+ if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
- // Load the local from the block
- // val = *(vm_get_ep(GET_EP(), level) - idx);
- let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
- let local_opnd = Opnd::mem(64, ep_opnd, offs);
+ let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) {
+ // Load the local using SP register
+ asm.local_opnd(ep_offset)
+ } else {
+ // Load environment pointer EP (level 0) from CFP
+ let ep_opnd = gen_get_ep(asm, level);
+
+ // Load the local from the block
+ // val = *(vm_get_ep(GET_EP(), level) - idx);
+ let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
+ let local_opnd = Opnd::mem(64, ep_opnd, offs);
+
+ // Write back an argument register to the stack. If the local variable
+ // is an argument, it might have an allocated register, but if this ISEQ
+ // is known to escape EP, the register shouldn't be used after this getlocal.
+ if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() {
+ asm.mov(local_opnd, asm.local_opnd(ep_offset));
+ }
+
+ local_opnd
+ };
// Write the local at SP
let stack_top = if level == 0 {
@@ -1854,7 +2490,6 @@ fn gen_getlocal_generic(
fn gen_getlocal(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
let level = jit.get_arg(1).as_u32();
@@ -1864,7 +2499,6 @@ fn gen_getlocal(
fn gen_getlocal_wc0(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
gen_getlocal_generic(jit, asm, idx, 0)
@@ -1873,7 +2507,6 @@ fn gen_getlocal_wc0(
fn gen_getlocal_wc1(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
gen_getlocal_generic(jit, asm, idx, 1)
@@ -1882,26 +2515,21 @@ fn gen_getlocal_wc1(
fn gen_setlocal_generic(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
+ // Post condition: The type of of the set local is updated in the Context.
let value_type = asm.ctx.get_opnd_type(StackOpnd(0));
- // Load environment pointer EP at level
- let ep_opnd = gen_get_ep(asm, level);
-
// Fallback because of write barrier
- if asm.ctx.get_chain_depth() > 0
- {
- // Save the PC and SP because it runs GC
- jit_prepare_routine_call(jit, asm);
-
- // Pop the value to write from the stack
- let value_opnd = asm.stack_opnd(0);
+ if asm.ctx.get_chain_depth() > 0 {
+ // Load environment pointer EP at level
+ let ep_opnd = gen_get_ep(asm, level);
+ // This function should not yield to the GC.
// void rb_vm_env_write(const VALUE *ep, int index, VALUE v)
let index = -(ep_offset as i64);
+ let value_opnd = asm.stack_opnd(0);
asm.ccall(
rb_vm_env_write as *const u8,
vec![
@@ -1910,21 +2538,52 @@ fn gen_setlocal_generic(
value_opnd,
]
);
- asm.stack_pop(1); // Keep it on stack during ccall for GC
+ asm.stack_pop(1);
+ // Set local type in the context
+ if level == 0 {
+ let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize();
+ asm.ctx.set_local_type(local_idx, value_type);
+ }
return Some(KeepCompiling);
}
- // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
- // only affect heap objects being written. If we know an immediate value is being written we
- // can skip this check.
- if !value_type.is_imm() {
- // flags & VM_ENV_FLAG_WB_REQUIRED
+ // Split the block if we need to invalidate this instruction when EP escapes
+ if level == 0 && !jit.escapes_ep() && !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
+ let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) {
+ // Load flags and the local using SP register
+ let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32);
+ let local_opnd = asm.local_opnd(ep_offset);
+
+ // Allocate a register to the new local operand
+ asm.alloc_reg(local_opnd.reg_opnd());
+ (flags_opnd, local_opnd)
+ } else {
+ // Make sure getlocal doesn't read a stale register. If the local variable
+ // is an argument, it might have an allocated register, but if this ISEQ
+ // is known to escape EP, the register shouldn't be used after this setlocal.
+ if level == 0 {
+ asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd());
+ }
+
+ // Load flags and the local for the level
+ let ep_opnd = gen_get_ep(asm, level);
let flags_opnd = Opnd::mem(
64,
ep_opnd,
SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32,
);
+ (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32))
+ };
+
+ // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
+ // only affect heap objects being written. If we know an immediate value is being written we
+ // can skip this check.
+ if !value_type.is_imm() {
+ // flags & VM_ENV_FLAG_WB_REQUIRED
asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into());
// if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
@@ -1933,12 +2592,12 @@ fn gen_setlocal_generic(
JCC_JNZ,
jit,
asm,
- ocb,
1,
Counter::setlocal_wb_required,
);
}
+ // Set local type in the context
if level == 0 {
let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize();
asm.ctx.set_local_type(local_idx, value_type);
@@ -1948,8 +2607,7 @@ fn gen_setlocal_generic(
let stack_top = asm.stack_pop(1);
// Write the value at the environment pointer
- let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
- asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top);
+ asm.mov(local_opnd, stack_top);
Some(KeepCompiling)
}
@@ -1957,41 +2615,37 @@ fn gen_setlocal_generic(
fn gen_setlocal(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
let level = jit.get_arg(1).as_u32();
- gen_setlocal_generic(jit, asm, ocb, idx, level)
+ gen_setlocal_generic(jit, asm, idx, level)
}
fn gen_setlocal_wc0(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
- gen_setlocal_generic(jit, asm, ocb, idx, 0)
+ gen_setlocal_generic(jit, asm, idx, 0)
}
fn gen_setlocal_wc1(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
- gen_setlocal_generic(jit, asm, ocb, idx, 1)
+ gen_setlocal_generic(jit, asm, idx, 1)
}
// new hash initialized from top N values
fn gen_newhash(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let num: u64 = jit.get_arg(0).as_u64();
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
if num != 0 {
// val = rb_hash_new_with_size(num / 2);
@@ -2021,12 +2675,12 @@ fn gen_newhash(
asm.cpop_into(new_hash); // x86 alignment
asm.stack_pop(num.try_into().unwrap());
- let stack_ret = asm.stack_push(Type::Hash);
+ let stack_ret = asm.stack_push(Type::CHash);
asm.mov(stack_ret, new_hash);
} else {
// val = rb_hash_new();
let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]);
- let stack_ret = asm.stack_push(Type::Hash);
+ let stack_ret = asm.stack_push(Type::CHash);
asm.mov(stack_ret, new_hash);
}
@@ -2036,19 +2690,38 @@ fn gen_newhash(
fn gen_putstring(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let put_val = jit.get_arg(0);
// Save the PC and SP because the callee will allocate
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
let str_opnd = asm.ccall(
rb_ec_str_resurrect as *const u8,
- vec![EC, put_val.into()]
+ vec![EC, put_val.into(), 0.into()]
);
- let stack_top = asm.stack_push(Type::TString);
+ let stack_top = asm.stack_push(Type::CString);
+ asm.mov(stack_top, str_opnd);
+
+ Some(KeepCompiling)
+}
+
+fn gen_putchilledstring(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let put_val = jit.get_arg(0);
+
+ // Save the PC and SP because the callee will allocate
+ jit_prepare_call_with_gc(jit, asm);
+
+ let str_opnd = asm.ccall(
+ rb_ec_str_resurrect as *const u8,
+ vec![EC, put_val.into(), 1.into()]
+ );
+
+ let stack_top = asm.stack_push(Type::CString);
asm.mov(stack_top, str_opnd);
Some(KeepCompiling)
@@ -2057,14 +2730,13 @@ fn gen_putstring(
fn gen_checkmatch(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let flag = jit.get_arg(0).as_u32();
// rb_vm_check_match is not leaf unless flag is VM_CHECKMATCH_TYPE_WHEN.
// See also: leafness_of_checkmatch() and check_match()
if flag != VM_CHECKMATCH_TYPE_WHEN {
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
}
let pattern = asm.stack_opnd(0);
@@ -2087,11 +2759,10 @@ fn gen_checkmatch(
fn gen_checkkeyword(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// When a keyword is unspecified past index 32, a hash will be used
// instead. This can only happen in iseqs taking more than 32 keywords.
- if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } {
+ if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= VM_KW_SPECIFIED_BITS_MAX.try_into().unwrap() } {
return None;
}
@@ -2101,11 +2772,11 @@ fn gen_checkkeyword(
// The index of the keyword we want to check
let index: i64 = jit.get_arg(1).as_i64();
- // Load environment pointer EP
- let ep_opnd = gen_get_ep(asm, 0);
-
- // VALUE kw_bits = *(ep - bits);
- let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset);
+ // `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for
+ // that "local" when passing it as an argument. We must use such a register to avoid loading
+ // random bits from the stack if any. We assume that EP is not escaped as of entering a method
+ // with keyword arguments.
+ let bits_opnd = asm.local_opnd(bits_offset as u32);
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
// if ((b & (0x01 << idx))) {
@@ -2127,8 +2798,7 @@ fn jit_chain_guard(
jcc: JCCKinds,
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
- depth_limit: i32,
+ depth_limit: u8,
counter: Counter,
) {
let target0_gen_fn = match jcc {
@@ -2139,7 +2809,7 @@ fn jit_chain_guard(
JCC_JO_MUL => BranchGenFn::JOMulToTarget0,
};
- if (asm.ctx.get_chain_depth() as i32) < depth_limit {
+ if asm.ctx.get_chain_depth() < depth_limit {
// Rewind Context to use the stack_size at the beginning of this instruction.
let mut deeper = asm.ctx.with_stack_size(jit.stack_size_for_pc);
deeper.increment_chain_depth();
@@ -2148,75 +2818,32 @@ fn jit_chain_guard(
idx: jit.insn_idx,
};
- gen_branch(jit, asm, ocb, bid, &deeper, None, None, target0_gen_fn);
+ jit.gen_branch(asm, bid, &deeper, None, None, target0_gen_fn);
} else {
target0_gen_fn.call(asm, Target::side_exit(counter), None);
}
}
// up to 8 different shapes for each
-pub const GET_IVAR_MAX_DEPTH: i32 = 8;
+pub const GET_IVAR_MAX_DEPTH: u8 = 8;
// up to 8 different shapes for each
-pub const SET_IVAR_MAX_DEPTH: i32 = 8;
+pub const SET_IVAR_MAX_DEPTH: u8 = 8;
// hashes and arrays
-pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;
+pub const OPT_AREF_MAX_CHAIN_DEPTH: u8 = 2;
// expandarray
-pub const EXPANDARRAY_MAX_CHAIN_DEPTH: i32 = 4;
+pub const EXPANDARRAY_MAX_CHAIN_DEPTH: u8 = 4;
// up to 5 different methods for send
-pub const SEND_MAX_DEPTH: i32 = 5;
+pub const SEND_MAX_DEPTH: u8 = 5;
// up to 20 different offsets for case-when
-pub const CASE_WHEN_MAX_DEPTH: i32 = 20;
+pub const CASE_WHEN_MAX_DEPTH: u8 = 20;
pub const MAX_SPLAT_LENGTH: i32 = 127;
-// Codegen for setting an instance variable.
-// Preconditions:
-// - receiver is in REG0
-// - receiver has the same class as CLASS_OF(comptime_receiver)
-// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
-fn gen_set_ivar(
- jit: &mut JITState,
- asm: &mut Assembler,
- ivar_name: ID,
- flags: u32,
- argc: i32,
-) -> Option<CodegenStatus> {
-
- // This is a .send call and we need to adjust the stack
- if flags & VM_CALL_OPT_SEND != 0 {
- handle_opt_send_shift_stack(asm, argc);
- }
-
- // Save the PC and SP because the callee may allocate
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
-
- // Get the operands from the stack
- let val_opnd = asm.stack_opnd(0);
- let recv_opnd = asm.stack_opnd(1);
-
- // Call rb_vm_set_ivar_id with the receiver, the ivar name, and the value
- let val = asm.ccall(
- rb_vm_set_ivar_id as *const u8,
- vec![
- recv_opnd,
- Opnd::UImm(ivar_name),
- val_opnd,
- ],
- );
- asm.stack_pop(2); // Keep them on stack during ccall for GC
-
- let out_opnd = asm.stack_push(Type::Unknown);
- asm.mov(out_opnd, val);
-
- Some(KeepCompiling)
-}
-
// Codegen for getting an instance variable.
// Preconditions:
// - receiver has the same class as CLASS_OF(comptime_receiver)
@@ -2224,51 +2851,35 @@ fn gen_set_ivar(
fn gen_get_ivar(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
- max_chain_depth: i32,
+ max_chain_depth: u8,
comptime_receiver: VALUE,
ivar_name: ID,
recv: Opnd,
recv_opnd: YARVOpnd,
) -> Option<CodegenStatus> {
- let comptime_val_klass = comptime_receiver.class_of();
-
// If recv isn't already a register, load it.
let recv = match recv {
Opnd::InsnOut { .. } => recv,
_ => asm.load(recv),
};
- // Check if the comptime class uses a custom allocator
- let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
- let uses_custom_allocator = match custom_allocator {
- Some(alloc_fun) => {
- let allocate_instance = rb_class_allocate_instance as *const u8;
- alloc_fun as *const u8 != allocate_instance
- }
- None => false,
- };
-
// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
// Use a general C call at the last chain to avoid exits on megamorphic shapes
- let megamorphic = asm.ctx.get_chain_depth() as i32 >= max_chain_depth;
+ let megamorphic = asm.ctx.get_chain_depth() >= max_chain_depth;
if megamorphic {
- gen_counter_incr(asm, Counter::num_getivar_megamorphic);
+ gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic);
}
- // If the class uses the default allocator, instances should all be T_OBJECT
- // NOTE: This assumes nobody changes the allocator of the class after allocation.
- // Eventually, we can encode whether an object is T_OBJECT or not
- // inside object shapes.
+ // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type.
// too-complex shapes can't use index access, so we use rb_ivar_get for them too.
- if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic {
+ if !comptime_receiver.heap_object_p() || comptime_receiver.shape_too_complex() || megamorphic {
// General case. Call rb_ivar_get().
// VALUE rb_ivar_get(VALUE obj, ID id)
asm_comment!(asm, "call rb_ivar_get()");
- // The function could raise exceptions.
- jit_prepare_routine_call(jit, asm);
+ // The function could raise RactorIsolationError.
+ jit_prepare_non_leaf_call(jit, asm);
let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]);
@@ -2281,15 +2892,14 @@ fn gen_get_ivar(
asm.mov(out_opnd, ivar_val);
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, asm, ocb);
+ jump_to_next_insn(jit, asm);
return Some(EndBlock);
}
let ivar_index = unsafe {
let shape_id = comptime_receiver.shape_id_of();
- let shape = rb_shape_get_shape_by_id(shape_id);
- let mut ivar_index: u32 = 0;
- if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) {
+ let mut ivar_index: u16 = 0;
+ if rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) {
Some(ivar_index as usize)
} else {
None
@@ -2299,10 +2909,7 @@ fn gen_get_ivar(
// Guard heap object (recv_opnd must be used before stack_pop)
guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap);
- // Compile time self is embedded and the ivar index lands within the object
- let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) };
-
- let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) };
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
@@ -2312,7 +2919,6 @@ fn gen_get_ivar(
JCC_JNE,
jit,
asm,
- ocb,
max_chain_depth,
Counter::getivar_megamorphic,
);
@@ -2331,45 +2937,52 @@ fn gen_get_ivar(
asm.mov(out_opnd, Qnil.into());
}
Some(ivar_index) => {
- if embed_test_result {
- // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
-
- // Load the variable
- let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
- let ivar_opnd = Opnd::mem(64, recv, offs);
-
- // Push the ivar on the stack
- let out_opnd = asm.stack_push(Type::Unknown);
- asm.mov(out_opnd, ivar_opnd);
+ let ivar_opnd = if receiver_t_object {
+ if comptime_receiver.embedded_p() {
+ // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h
+
+ // Load the variable
+ let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
+ Opnd::mem(64, recv, offs)
+ } else {
+ // Compile time value is *not* embedded.
+
+ // Get a pointer to the extended table
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32));
+
+ // Read the ivar from the extended table
+ Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32)
+ }
} else {
- // Compile time value is *not* embedded.
-
- // Get a pointer to the extended table
- let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
+ asm_comment!(asm, "call rb_ivar_get_at()");
- // Read the ivar from the extended table
- let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
+ if assume_single_ractor_mode(jit, asm) {
+ asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())])
+ } else {
+ // The function could raise RactorIsolationError.
+ jit_prepare_non_leaf_call(jit, asm);
+ asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)])
+ }
+ };
- let out_opnd = asm.stack_push(Type::Unknown);
- asm.mov(out_opnd, ivar_opnd);
- }
+ // Push the ivar on the stack
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ivar_opnd);
}
}
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, asm, ocb);
+ jump_to_next_insn(jit, asm);
Some(EndBlock)
}
fn gen_getinstancevariable(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let ivar_name = jit.get_arg(0).as_u64();
@@ -2382,7 +2995,6 @@ fn gen_getinstancevariable(
gen_get_ivar(
jit,
asm,
- ocb,
GET_IVAR_MAX_DEPTH,
comptime_val,
ivar_name,
@@ -2417,7 +3029,7 @@ fn gen_write_iv(
// Compile time value is *not* embedded.
// Get a pointer to the extended table
- let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32));
// Write the ivar in to the extended table
let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
@@ -2430,52 +3042,60 @@ fn gen_write_iv(
fn gen_setinstancevariable(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let ivar_name = jit.get_arg(0).as_u64();
+ let ic = jit.get_arg(1).as_ptr();
let comptime_receiver = jit.peek_at_self();
- let comptime_val_klass = comptime_receiver.class_of();
+ gen_set_ivar(
+ jit,
+ asm,
+ comptime_receiver,
+ ivar_name,
+ SelfOpnd,
+ Some(ic),
+ )
+}
+/// Set an instance variable on setinstancevariable or attr_writer.
+/// It switches the behavior based on what recv_opnd is given.
+/// * SelfOpnd: setinstancevariable, which doesn't push a result onto the stack.
+/// * StackOpnd: attr_writer, which pushes a result onto the stack.
+fn gen_set_ivar(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ comptime_receiver: VALUE,
+ ivar_name: ID,
+ recv_opnd: YARVOpnd,
+ ic: Option<*const iseq_inline_iv_cache_entry>,
+) -> Option<CodegenStatus> {
// If the comptime receiver is frozen, writing an IV will raise an exception
// and we don't want to JIT code to deal with that situation.
if comptime_receiver.is_frozen() {
- gen_counter_incr(asm, Counter::setivar_frozen);
+ gen_counter_incr(jit, asm, Counter::setivar_frozen);
return None;
}
let stack_type = asm.ctx.get_opnd_type(StackOpnd(0));
- // Check if the comptime class uses a custom allocator
- let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
- let uses_custom_allocator = match custom_allocator {
- Some(alloc_fun) => {
- let allocate_instance = rb_class_allocate_instance as *const u8;
- alloc_fun as *const u8 != allocate_instance
- }
- None => false,
- };
-
// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
// Use a general C call at the last chain to avoid exits on megamorphic shapes
- let megamorphic = asm.ctx.get_chain_depth() as i32 >= SET_IVAR_MAX_DEPTH;
+ let megamorphic = asm.ctx.get_chain_depth() >= SET_IVAR_MAX_DEPTH;
if megamorphic {
- gen_counter_incr(asm, Counter::num_setivar_megamorphic);
+ gen_counter_incr(jit, asm, Counter::num_setivar_megamorphic);
}
// Get the iv index
let shape_too_complex = comptime_receiver.shape_too_complex();
- let ivar_index = if !shape_too_complex {
+ let ivar_index = if !comptime_receiver.special_const_p() && !shape_too_complex {
let shape_id = comptime_receiver.shape_id_of();
- let shape = unsafe { rb_shape_get_shape_by_id(shape_id) };
- let mut ivar_index: u32 = 0;
- if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } {
+ let mut ivar_index: u16 = 0;
+ if unsafe { rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index) } {
Some(ivar_index as usize)
} else {
None
@@ -2485,27 +3105,31 @@ fn gen_setinstancevariable(
};
// The current shape doesn't contain this iv, we need to transition to another shape.
+ let mut new_shape_too_complex = false;
let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() {
- let current_shape = comptime_receiver.shape_of();
- let next_shape = unsafe { rb_shape_get_next(current_shape, comptime_receiver, ivar_name) };
- let next_shape_id = unsafe { rb_shape_id(next_shape) };
+ let current_shape_id = comptime_receiver.shape_id_of();
+ // We don't need to check about imemo_fields here because we're definitely looking at a T_OBJECT.
+ let klass = unsafe { rb_obj_class(comptime_receiver) };
+ let next_shape_id = unsafe { rb_shape_transition_add_ivar_no_warnings(klass, current_shape_id, ivar_name) };
// If the VM ran out of shapes, or this class generated too many leaf,
// it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table).
- if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID {
+ new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id) };
+ if new_shape_too_complex {
Some((next_shape_id, None, 0_usize))
} else {
- let current_capacity = unsafe { (*current_shape).capacity };
+ let current_capacity = unsafe { rb_yjit_shape_capacity(current_shape_id) };
+ let next_capacity = unsafe { rb_yjit_shape_capacity(next_shape_id) };
// If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to
// reallocate it.
- let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity };
+ let needs_extension = next_capacity != current_capacity;
// We can write to the object, but we need to transition the shape
- let ivar_index = unsafe { (*current_shape).next_iv_index } as usize;
+ let ivar_index = unsafe { rb_yjit_shape_index(next_shape_id) } as usize;
let needs_extension = if needs_extension {
- Some((current_capacity, unsafe { (*next_shape).capacity }))
+ Some((current_capacity, next_capacity))
} else {
None
};
@@ -2514,45 +3138,53 @@ fn gen_setinstancevariable(
} else {
None
};
- let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _)));
- // If the receiver isn't a T_OBJECT, or uses a custom allocator,
- // then just write out the IV write as a function call.
+ // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call.
// too-complex shapes can't use index access, so we use rb_ivar_get for them too.
- if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic {
- asm_comment!(asm, "call rb_vm_setinstancevariable()");
-
- let ic = jit.get_arg(1).as_u64(); // type IVC
-
- // The function could raise exceptions.
+ if !receiver_t_object || shape_too_complex || new_shape_too_complex || megamorphic {
+ // The function could raise FrozenError.
// Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Get the operands from the stack
let val_opnd = asm.stack_opnd(0);
- // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
- asm.ccall(
- rb_vm_setinstancevariable as *const u8,
- vec![
- Opnd::const_ptr(jit.iseq as *const u8),
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
- ivar_name.into(),
- val_opnd,
- Opnd::const_ptr(ic as *const u8),
- ]
- );
- asm.stack_pop(1); // Keep it on stack during ccall for GC
+ if let StackOpnd(index) = recv_opnd { // attr_writer
+ let recv = asm.stack_opnd(index as i32);
+ asm_comment!(asm, "call rb_vm_set_ivar_id()");
+ asm.ccall(
+ rb_vm_set_ivar_id as *const u8,
+ vec![
+ recv,
+ Opnd::UImm(ivar_name),
+ val_opnd,
+ ],
+ );
+ } else { // setinstancevariable
+ asm_comment!(asm, "call rb_vm_setinstancevariable()");
+ asm.ccall(
+ rb_vm_setinstancevariable as *const u8,
+ vec![
+ VALUE(jit.iseq as usize).into(),
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
+ ivar_name.into(),
+ val_opnd,
+ Opnd::const_ptr(ic.unwrap() as *const u8),
+ ],
+ );
+ }
} else {
// Get the receiver
- let mut recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
-
- let recv_opnd = SelfOpnd;
+ let mut recv = asm.load(if let StackOpnd(index) = recv_opnd {
+ asm.stack_opnd(index as i32)
+ } else {
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)
+ });
// Upgrade type
guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap);
- let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) };
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
@@ -2562,12 +3194,10 @@ fn gen_setinstancevariable(
JCC_JNE,
jit,
asm,
- ocb,
SET_IVAR_MAX_DEPTH,
Counter::setivar_megamorphic,
);
- asm.spill_temps(); // for ccall (must be done before write_val is popped)
let write_val;
match ivar_index {
@@ -2582,7 +3212,7 @@ fn gen_setinstancevariable(
// It allocates so can trigger GC, which takes the VM lock
// so could yield to a different ractor.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
asm.ccall(rb_ensure_iv_list_size as *const u8,
vec![
recv,
@@ -2592,10 +3222,14 @@ fn gen_setinstancevariable(
);
// Load the receiver again after the function call
- recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF))
+ recv = asm.load(if let StackOpnd(index) = recv_opnd {
+ asm.stack_opnd(index as i32)
+ } else {
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)
+ });
}
- write_val = asm.stack_pop(1);
+ write_val = asm.stack_opnd(0);
gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension.is_some());
asm_comment!(asm, "write shape");
@@ -2613,7 +3247,7 @@ fn gen_setinstancevariable(
// the iv index by searching up the shape tree. If we've
// made the transition already, then there's no reason to
// update the shape on the object. Just set the IV.
- write_val = asm.stack_pop(1);
+ write_val = asm.stack_opnd(0);
gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false);
},
}
@@ -2621,6 +3255,7 @@ fn gen_setinstancevariable(
// If we know the stack value is an immediate, there's no need to
// generate WB code.
if !stack_type.is_imm() {
+ asm.spill_regs(); // for ccall (unconditionally spill them for RegMappings consistency)
let skip_wb = asm.new_label("skip_wb");
// If the value we're writing is an immediate, we don't need to WB
asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into());
@@ -2642,6 +3277,16 @@ fn gen_setinstancevariable(
asm.write_label(skip_wb);
}
}
+ let write_val = asm.stack_pop(1); // Keep write_val on stack during ccall for GC
+
+ // If it's attr_writer, i.e. recv_opnd is StackOpnd, we need to pop
+ // the receiver and push the written value onto the stack.
+ if let StackOpnd(_) = recv_opnd {
+ asm.stack_pop(1); // Pop receiver
+
+ let out_opnd = asm.stack_push(Type::Unknown); // Push a return value
+ asm.mov(out_opnd, write_val);
+ }
Some(KeepCompiling)
}
@@ -2649,37 +3294,46 @@ fn gen_setinstancevariable(
fn gen_defined(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let op_type = jit.get_arg(0).as_u64();
let obj = jit.get_arg(1);
let pushval = jit.get_arg(2);
- // Save the PC and SP because the callee may allocate
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
+ match op_type as u32 {
+ DEFINED_YIELD => {
+ asm.stack_pop(1); // v operand is not used
+ let out_opnd = asm.stack_push(Type::Unknown); // nil or "yield"
- // Get the operands from the stack
- let v_opnd = asm.stack_opnd(0);
+ gen_block_given(jit, asm, out_opnd, pushval.into(), Qnil.into());
+ }
+ _ => {
+ // Save the PC and SP because the callee may allocate or call #respond_to?
+ // Note that this modifies REG_SP, which is why we do it first
+ jit_prepare_non_leaf_call(jit, asm);
- // Call vm_defined(ec, reg_cfp, op_type, obj, v)
- let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]);
- asm.stack_pop(1); // Keep it on stack during ccall for GC
+ // Get the operands from the stack
+ let v_opnd = asm.stack_opnd(0);
- // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
- // val = pushval;
- // }
- asm.test(def_result, Opnd::UImm(255));
- let out_value = asm.csel_nz(pushval.into(), Qnil.into());
+ // Call vm_defined(ec, reg_cfp, op_type, obj, v)
+ let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]);
+ asm.stack_pop(1); // Keep it on stack during ccall for GC
- // Push the return value onto the stack
- let out_type = if pushval.special_const_p() {
- Type::UnknownImm
- } else {
- Type::Unknown
- };
- let stack_ret = asm.stack_push(out_type);
- asm.mov(stack_ret, out_value);
+ // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
+ // val = pushval;
+ // }
+ asm.test(def_result, Opnd::UImm(255));
+ let out_value = asm.csel_nz(pushval.into(), Qnil.into());
+
+ // Push the return value onto the stack
+ let out_type = if pushval.special_const_p() {
+ Type::UnknownImm
+ } else {
+ Type::Unknown
+ };
+ let stack_ret = asm.stack_push(out_type);
+ asm.mov(stack_ret, out_value);
+ }
+ }
Some(KeepCompiling)
}
@@ -2687,12 +3341,10 @@ fn gen_defined(
fn gen_definedivar(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize base on a runtime receiver
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let ivar_name = jit.get_arg(0).as_u64();
@@ -2706,12 +3358,12 @@ fn gen_definedivar(
// Specialize base on compile time values
let comptime_receiver = jit.peek_at_self();
- if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() as i32 >= GET_IVAR_MAX_DEPTH {
+ if comptime_receiver.special_const_p() || comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH {
// Fall back to calling rb_ivar_defined
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
// Call rb_ivar_defined(recv, ivar_name)
let def_result = asm.ccall(rb_ivar_defined as *const u8, vec![recv, ivar_name.into()]);
@@ -2732,9 +3384,8 @@ fn gen_definedivar(
let shape_id = comptime_receiver.shape_id_of();
let ivar_exists = unsafe {
- let shape = rb_shape_get_shape_by_id(shape_id);
- let mut ivar_index: u32 = 0;
- rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index)
+ let mut ivar_index: u16 = 0;
+ rb_shape_get_iv_index(shape_id, ivar_name, &mut ivar_index)
};
// Guard heap object (recv_opnd must be used before stack_pop)
@@ -2749,7 +3400,6 @@ fn gen_definedivar(
JCC_JNE,
jit,
asm,
- ocb,
GET_IVAR_MAX_DEPTH,
Counter::definedivar_megamorphic,
);
@@ -2758,15 +3408,12 @@ fn gen_definedivar(
jit_putobject(asm, result);
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, asm, ocb);
-
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
}
fn gen_checktype(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let type_val = jit.get_arg(0).as_u32();
@@ -2821,14 +3468,13 @@ fn gen_checktype(
fn gen_concatstrings(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let n = jit.get_arg(0).as_usize();
- // Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, asm);
+ // rb_str_concat_literals may raise Encoding::CompatibilityError
+ jit_prepare_non_leaf_call(jit, asm);
- let values_ptr = asm.lea(asm.ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n as isize)));
+ let values_ptr = asm.lea(asm.ctx.sp_opnd(-(n as i32)));
// call rb_str_concat_literals(size_t n, const VALUE *strings);
let return_value = asm.ccall(
@@ -2846,7 +3492,6 @@ fn gen_concatstrings(
fn guard_two_fixnums(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) {
let counter = Counter::guard_send_not_fixnums;
@@ -2890,7 +3535,6 @@ fn guard_two_fixnums(
JCC_JZ,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
counter,
);
@@ -2903,7 +3547,6 @@ fn guard_two_fixnums(
JCC_JZ,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
counter,
);
@@ -2920,7 +3563,6 @@ type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd;
fn gen_fixnum_cmp(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
cmov_op: CmovFn,
bop: ruby_basic_operators,
) -> Option<CodegenStatus> {
@@ -2928,18 +3570,17 @@ fn gen_fixnum_cmp(
Some(two_fixnums) => two_fixnums,
None => {
// Defer compilation so we can specialize based on a runtime receiver
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, bop) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, bop) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands from the stack
let arg1 = asm.stack_pop(1);
@@ -2955,40 +3596,36 @@ fn gen_fixnum_cmp(
Some(KeepCompiling)
} else {
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_lt(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_l, BOP_LT)
+ gen_fixnum_cmp(jit, asm, Assembler::csel_l, BOP_LT)
}
fn gen_opt_le(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_le, BOP_LE)
+ gen_fixnum_cmp(jit, asm, Assembler::csel_le, BOP_LE)
}
fn gen_opt_ge(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_ge, BOP_GE)
+ gen_fixnum_cmp(jit, asm, Assembler::csel_ge, BOP_GE)
}
fn gen_opt_gt(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_g, BOP_GT)
+ gen_fixnum_cmp(jit, asm, Assembler::csel_g, BOP_GT)
}
// Implements specialized equality for either two fixnum or two strings
@@ -2997,7 +3634,6 @@ fn gen_opt_gt(
fn gen_equality_specialized(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
gen_eq: bool,
) -> Option<bool> {
let a_opnd = asm.stack_opnd(1);
@@ -3009,12 +3645,12 @@ fn gen_equality_specialized(
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
// if overridden, emit the generic version
return Some(false);
}
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
asm.cmp(a_opnd, b_opnd);
let val = if gen_eq {
@@ -3031,14 +3667,14 @@ fn gen_equality_specialized(
return Some(true);
}
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
return None;
}
let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
let comptime_b = jit.peek_at_stack(&asm.ctx, 0);
if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } {
- if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
+ if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
// if overridden, emit the generic version
return Some(false);
}
@@ -3047,8 +3683,6 @@ fn gen_equality_specialized(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cString },
a_opnd,
a_opnd.into(),
comptime_a,
@@ -3060,7 +3694,7 @@ fn gen_equality_specialized(
let ret = asm.new_label("ret");
// Spill for ccall. For safety, unconditionally spill temps before branching.
- asm.spill_temps();
+ asm.spill_regs();
// If they are equal by identity, return true
asm.cmp(a_opnd, b_opnd);
@@ -3074,8 +3708,6 @@ fn gen_equality_specialized(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cString },
b_opnd,
b_opnd.into(),
comptime_b,
@@ -3110,54 +3742,48 @@ fn gen_equality_specialized(
fn gen_opt_eq(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- let specialized = match gen_equality_specialized(jit, asm, ocb, true) {
+ let specialized = match gen_equality_specialized(jit, asm, true) {
Some(specialized) => specialized,
None => {
// Defer compilation so we can specialize base on a runtime receiver
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if specialized {
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
} else {
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_neq(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// opt_neq is passed two rb_call_data as arguments:
// first for ==, second for !=
let cd = jit.get_arg(1).as_ptr();
- return gen_send_general(jit, asm, ocb, cd, None);
+ perf_call! { gen_send_general(jit, asm, cd, None) }
}
fn gen_opt_aref(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let cd: *const rb_call_data = jit.get_arg(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };
// Only JIT one arg calls like `ary[6]`
if argc != 1 {
- gen_counter_incr(asm, Counter::opt_aref_argc_not_one);
+ gen_counter_incr(jit, asm, Counter::opt_aref_argc_not_one);
return None;
}
// Defer compilation so we can specialize base on a runtime receiver
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
// Specialize base on compile time values
@@ -3165,7 +3791,7 @@ fn gen_opt_aref(
let comptime_recv = jit.peek_at_stack(&asm.ctx, 1);
if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() {
- if !assume_bop_not_redefined(jit, asm, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
+ if !assume_bop_not_redefined(jit, asm, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
return None;
}
@@ -3178,8 +3804,6 @@ fn gen_opt_aref(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cArray },
recv_opnd,
recv_opnd.into(),
comptime_recv,
@@ -3207,10 +3831,9 @@ fn gen_opt_aref(
}
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
} else if comptime_recv.class_of() == unsafe { rb_cHash } {
- if !assume_bop_not_redefined(jit, asm, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
+ if !assume_bop_not_redefined(jit, asm, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
return None;
}
@@ -3220,8 +3843,6 @@ fn gen_opt_aref(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cHash },
recv_opnd,
recv_opnd.into(),
comptime_recv,
@@ -3230,7 +3851,7 @@ fn gen_opt_aref(
);
// Prepare to call rb_hash_aref(). It might call #hash on the key.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Call rb_hash_aref
let key_opnd = asm.stack_opnd(0);
@@ -3245,23 +3866,20 @@ fn gen_opt_aref(
asm.mov(stack_ret, val);
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
} else {
// General case. Call the [] method.
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_aset(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let comptime_recv = jit.peek_at_stack(&asm.ctx, 2);
@@ -3277,8 +3895,6 @@ fn gen_opt_aset(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cArray },
recv,
recv.into(),
comptime_recv,
@@ -3290,8 +3906,6 @@ fn gen_opt_aset(
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cInteger },
key,
key.into(),
comptime_key,
@@ -3300,7 +3914,7 @@ fn gen_opt_aset(
);
// We might allocate or raise
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Call rb_ary_store
let recv = asm.stack_opnd(2);
@@ -3318,15 +3932,12 @@ fn gen_opt_aset(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, val);
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm)
} else if comptime_recv.class_of() == unsafe { rb_cHash } {
// Guard receiver is a Hash
jit_guard_known_klass(
jit,
asm,
- ocb,
- unsafe { rb_cHash },
recv,
recv.into(),
comptime_recv,
@@ -3335,7 +3946,7 @@ fn gen_opt_aset(
);
// We might allocate or raise
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Call rb_hash_aset
let recv = asm.stack_opnd(2);
@@ -3348,66 +3959,31 @@ fn gen_opt_aset(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, ret);
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
} else {
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
-fn gen_opt_aref_with(
- jit: &mut JITState,
- asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
-) -> Option<CodegenStatus>{
- jit_prepare_routine_call(jit, asm);
-
- let key_opnd = Opnd::Value(jit.get_arg(0));
- let recv_opnd = asm.stack_opnd(0);
-
- extern "C" {
- fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE;
- }
-
- let val_opnd = asm.ccall(
- rb_vm_opt_aref_with as *const u8,
- vec![
- recv_opnd,
- key_opnd
- ],
- );
- asm.stack_pop(1); // Keep it on stack during GC
-
- asm.cmp(val_opnd, Qundef.into());
- asm.je(Target::side_exit(Counter::opt_aref_with_qundef));
-
- let top = asm.stack_push(Type::Unknown);
- asm.mov(top, val_opnd);
-
- return Some(KeepCompiling);
-}
-
fn gen_opt_and(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
// Defer compilation so we can specialize on a runtime `self`
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands and destination from the stack
let arg1 = asm.stack_pop(1);
@@ -3418,36 +3994,34 @@ fn gen_opt_and(
// Push the output on the stack
let dst = asm.stack_push(Type::Fixnum);
- asm.store(dst, val);
+ asm.mov(dst, val);
Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_or(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
// Defer compilation so we can specialize on a runtime `self`
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands and destination from the stack
let arg1 = asm.stack_pop(1);
@@ -3458,36 +4032,34 @@ fn gen_opt_or(
// Push the output on the stack
let dst = asm.stack_push(Type::Fixnum);
- asm.store(dst, val);
+ asm.mov(dst, val);
Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_minus(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
// Defer compilation so we can specialize on a runtime `self`
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands and destination from the stack
let arg1 = asm.stack_pop(1);
@@ -3500,36 +4072,34 @@ fn gen_opt_minus(
// Push the output on the stack
let dst = asm.stack_push(Type::Fixnum);
- asm.store(dst, val);
+ asm.mov(dst, val);
Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_mult(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
// Fallback to a method call if it overflows
if two_fixnums && asm.ctx.get_chain_depth() == 0 {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands from the stack
let arg1 = asm.stack_pop(1);
@@ -3540,7 +4110,7 @@ fn gen_opt_mult(
let arg0_untag = asm.rshift(arg0, Opnd::UImm(1));
let arg1_untag = asm.sub(arg1, Opnd::UImm(1));
let out_val = asm.mul(arg0_untag, arg1_untag);
- jit_chain_guard(JCC_JO_MUL, jit, asm, ocb, 1, Counter::opt_mult_overflow);
+ jit_chain_guard(JCC_JO_MUL, jit, asm, 1, Counter::opt_mult_overflow);
let out_val = asm.add(out_val, Opnd::UImm(1));
// Push the output on the stack
@@ -3549,40 +4119,37 @@ fn gen_opt_mult(
Some(KeepCompiling)
} else {
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_div(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
fn gen_opt_mod(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
Some(two_fixnums) => two_fixnums,
None => {
// Defer compilation so we can specialize on a runtime `self`
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ return jit.defer_compilation(asm);
}
};
if two_fixnums {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) {
return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Get the operands and destination from the stack
let arg1 = asm.stack_pop(1);
@@ -3603,52 +4170,47 @@ fn gen_opt_mod(
Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
}
fn gen_opt_ltlt(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
fn gen_opt_nil_p(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
fn gen_opt_empty_p(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
fn gen_opt_succ(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, asm, ocb)
+ gen_opt_send_without_block(jit, asm)
}
fn gen_opt_str_freeze(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
+ if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
return None;
}
@@ -3661,12 +4223,45 @@ fn gen_opt_str_freeze(
Some(KeepCompiling)
}
+fn gen_opt_ary_freeze(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ if !assume_bop_not_redefined(jit, asm, ARRAY_REDEFINED_OP_FLAG, BOP_FREEZE) {
+ return None;
+ }
+
+ let ary = jit.get_arg(0);
+
+ // Push the return value onto the stack
+ let stack_ret = asm.stack_push(Type::CArray);
+ asm.mov(stack_ret, ary.into());
+
+ Some(KeepCompiling)
+}
+
+fn gen_opt_hash_freeze(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ if !assume_bop_not_redefined(jit, asm, HASH_REDEFINED_OP_FLAG, BOP_FREEZE) {
+ return None;
+ }
+
+ let hash = jit.get_arg(0);
+
+ // Push the return value onto the stack
+ let stack_ret = asm.stack_push(Type::CHash);
+ asm.mov(stack_ret, hash.into());
+
+ Some(KeepCompiling)
+}
+
fn gen_opt_str_uminus(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
+ if !assume_bop_not_redefined(jit, asm, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
return None;
}
@@ -3682,19 +4277,17 @@ fn gen_opt_str_uminus(
fn gen_opt_newarray_max(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let num = jit.get_arg(0).as_u32();
- // Save the PC and SP because we may allocate
- jit_prepare_routine_call(jit, asm);
+ // Save the PC and SP because we may call #max
+ jit_prepare_non_leaf_call(jit, asm);
extern "C" {
fn rb_vm_opt_newarray_max(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
}
- let offset_magnitude = (SIZEOF_VALUE as u32) * num;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
let values_ptr = asm.lea(values_opnd);
let val_opnd = asm.ccall(
@@ -3713,41 +4306,134 @@ fn gen_opt_newarray_max(
Some(KeepCompiling)
}
-fn gen_opt_newarray_send(
+fn gen_opt_duparray_send(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let method = jit.get_arg(1).as_u64();
- if method == ID!(min) {
- gen_opt_newarray_min(jit, asm, _ocb)
- } else if method == ID!(max) {
- gen_opt_newarray_max(jit, asm, _ocb)
- } else if method == ID!(hash) {
- gen_opt_newarray_hash(jit, asm, _ocb)
+ if method == ID!(include_p) {
+ gen_opt_duparray_send_include_p(jit, asm)
+ } else {
+ None
+ }
+}
+
+fn gen_opt_duparray_send_include_p(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ asm_comment!(asm, "opt_duparray_send include_p");
+
+ let ary = jit.get_arg(0);
+ let argc = jit.get_arg(2).as_usize();
+
+ // Save the PC and SP because we may call #include?
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_duparray_include_p(ec: EcPtr, ary: VALUE, target: VALUE) -> VALUE;
+ }
+
+ let target = asm.ctx.sp_opnd(-1);
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_duparray_include_p as *const u8,
+ vec![
+ EC,
+ ary.into(),
+ target,
+ ],
+ );
+
+ asm.stack_pop(argc);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
+}
+
+fn gen_opt_newarray_send(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let method = jit.get_arg(1).as_u32();
+
+ if method == VM_OPT_NEWARRAY_SEND_MIN {
+ gen_opt_newarray_min(jit, asm)
+ } else if method == VM_OPT_NEWARRAY_SEND_MAX {
+ gen_opt_newarray_max(jit, asm)
+ } else if method == VM_OPT_NEWARRAY_SEND_HASH {
+ gen_opt_newarray_hash(jit, asm)
+ } else if method == VM_OPT_NEWARRAY_SEND_INCLUDE_P {
+ gen_opt_newarray_include_p(jit, asm)
+ } else if method == VM_OPT_NEWARRAY_SEND_PACK {
+ gen_opt_newarray_pack_buffer(jit, asm, 1, None)
+ } else if method == VM_OPT_NEWARRAY_SEND_PACK_BUFFER {
+ gen_opt_newarray_pack_buffer(jit, asm, 2, Some(1))
} else {
None
}
}
+fn gen_opt_newarray_pack_buffer(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ fmt_offset: u32,
+ buffer: Option<u32>,
+) -> Option<CodegenStatus> {
+ asm_comment!(asm, "opt_newarray_send pack");
+
+ let num = jit.get_arg(0).as_u32();
+
+ // Save the PC and SP because we may call #pack
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_newarray_pack_buffer(ec: EcPtr, num: u32, elts: *const VALUE, fmt: VALUE, buffer: VALUE) -> VALUE;
+ }
+
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
+ let values_ptr = asm.lea(values_opnd);
+
+ let fmt_string = asm.ctx.sp_opnd(-(fmt_offset as i32));
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_newarray_pack_buffer as *const u8,
+ vec![
+ EC,
+ (num - fmt_offset).into(),
+ values_ptr,
+ fmt_string,
+ match buffer {
+ None => Qundef.into(),
+ Some(i) => asm.ctx.sp_opnd(-(i as i32)),
+ },
+ ],
+ );
+
+ asm.stack_pop(num.as_usize());
+ let stack_ret = asm.stack_push(Type::CString);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
+}
+
fn gen_opt_newarray_hash(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let num = jit.get_arg(0).as_u32();
- // Save the PC and SP because we may allocate
- jit_prepare_routine_call(jit, asm);
+ // Save the PC and SP because we may call #hash
+ jit_prepare_non_leaf_call(jit, asm);
extern "C" {
fn rb_vm_opt_newarray_hash(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
}
- let offset_magnitude = (SIZEOF_VALUE as u32) * num;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
let values_ptr = asm.lea(values_opnd);
let val_opnd = asm.ccall(
@@ -3766,23 +4452,57 @@ fn gen_opt_newarray_hash(
Some(KeepCompiling)
}
+fn gen_opt_newarray_include_p(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ asm_comment!(asm, "opt_newarray_send include?");
+
+ let num = jit.get_arg(0).as_u32();
+
+ // Save the PC and SP because we may call customized methods.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_newarray_include_p(ec: EcPtr, num: u32, elts: *const VALUE, target: VALUE) -> VALUE;
+ }
+
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
+ let values_ptr = asm.lea(values_opnd);
+ let target = asm.ctx.sp_opnd(-1);
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_newarray_include_p as *const u8,
+ vec![
+ EC,
+ (num - 1).into(),
+ values_ptr,
+ target
+ ],
+ );
+
+ asm.stack_pop(num.as_usize());
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
+}
+
fn gen_opt_newarray_min(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let num = jit.get_arg(0).as_u32();
- // Save the PC and SP because we may allocate
- jit_prepare_routine_call(jit, asm);
+ // Save the PC and SP because we may call #min
+ jit_prepare_non_leaf_call(jit, asm);
extern "C" {
fn rb_vm_opt_newarray_min(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
}
- let offset_magnitude = (SIZEOF_VALUE as u32) * num;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
let values_ptr = asm.lea(values_opnd);
let val_opnd = asm.ccall(
@@ -3804,39 +4524,34 @@ fn gen_opt_newarray_min(
fn gen_opt_not(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- return gen_opt_send_without_block(jit, asm, ocb);
+ return gen_opt_send_without_block(jit, asm);
}
fn gen_opt_size(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- return gen_opt_send_without_block(jit, asm, ocb);
+ return gen_opt_send_without_block(jit, asm);
}
fn gen_opt_length(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- return gen_opt_send_without_block(jit, asm, ocb);
+ return gen_opt_send_without_block(jit, asm);
}
fn gen_opt_regexpmatch2(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- return gen_opt_send_without_block(jit, asm, ocb);
+ return gen_opt_send_without_block(jit, asm);
}
fn gen_opt_case_dispatch(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Normally this instruction would lookup the key in a hash and jump to an
// offset based on that.
@@ -3845,9 +4560,8 @@ fn gen_opt_case_dispatch(
// We'd hope that our jitted code will be sufficiently fast without the
// hash lookup, at least for small hashes, but it's worth revisiting this
// assumption in the future.
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let case_hash = jit.get_arg(0);
@@ -3877,8 +4591,14 @@ fn gen_opt_case_dispatch(
all_fixnum
}
- if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) {
- if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) {
+ // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch
+ let megamorphic = asm.ctx.get_chain_depth() >= CASE_WHEN_MAX_DEPTH;
+ if megamorphic {
+ gen_counter_incr(jit, asm, Counter::num_opt_case_dispatch_megamorphic);
+ }
+
+ if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) && !megamorphic {
+ if !assume_bop_not_redefined(jit, asm, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) {
return None;
}
@@ -3888,7 +4608,6 @@ fn gen_opt_case_dispatch(
JCC_JNE,
jit,
asm,
- ocb,
CASE_WHEN_MAX_DEPTH,
Counter::opt_case_dispatch_megamorphic,
);
@@ -3918,7 +4637,6 @@ fn gen_opt_case_dispatch(
fn gen_branchif(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let jump_offset = jit.get_arg(0).as_i32();
@@ -3955,10 +4673,8 @@ fn gen_branchif(
// Generate the branch instructions
let ctx = asm.ctx;
- gen_branch(
- jit,
+ jit.gen_branch(
asm,
- ocb,
jump_block,
&ctx,
Some(next_block),
@@ -3973,7 +4689,6 @@ fn gen_branchif(
fn gen_branchunless(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let jump_offset = jit.get_arg(0).as_i32();
@@ -4011,10 +4726,8 @@ fn gen_branchunless(
// Generate the branch instructions
let ctx = asm.ctx;
- gen_branch(
- jit,
+ jit.gen_branch(
asm,
- ocb,
jump_block,
&ctx,
Some(next_block),
@@ -4029,7 +4742,6 @@ fn gen_branchunless(
fn gen_branchnil(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let jump_offset = jit.get_arg(0).as_i32();
@@ -4064,10 +4776,8 @@ fn gen_branchnil(
asm.cmp(val_opnd, Opnd::UImm(Qnil.into()));
// Generate the branch instructions
let ctx = asm.ctx;
- gen_branch(
- jit,
+ jit.gen_branch(
asm,
- ocb,
jump_block,
&ctx,
Some(next_block),
@@ -4082,23 +4792,22 @@ fn gen_branchnil(
fn gen_throw(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let throw_state = jit.get_arg(0).as_u64();
let throwobj = asm.stack_pop(1);
let throwobj = asm.load(throwobj);
// Gather some statistics about throw
- gen_counter_incr(asm, Counter::num_throw);
+ gen_counter_incr(jit, asm, Counter::num_throw);
match (throw_state & VM_THROW_STATE_MASK as u64) as u32 {
- RUBY_TAG_BREAK => gen_counter_incr(asm, Counter::num_throw_break),
- RUBY_TAG_RETRY => gen_counter_incr(asm, Counter::num_throw_retry),
- RUBY_TAG_RETURN => gen_counter_incr(asm, Counter::num_throw_return),
+ RUBY_TAG_BREAK => gen_counter_incr(jit, asm, Counter::num_throw_break),
+ RUBY_TAG_RETRY => gen_counter_incr(jit, asm, Counter::num_throw_retry),
+ RUBY_TAG_RETURN => gen_counter_incr(jit, asm, Counter::num_throw_return),
_ => {},
}
// THROW_DATA_NEW allocates. Save SP for GC and PC for allocation tracing as
- // well as handling the catch table. However, not using jit_prepare_routine_call
+ // well as handling the catch table. However, not using jit_prepare_call_with_gc
// since we don't need a patch point for this implementation.
jit_save_pc(jit, asm);
gen_save_sp(asm);
@@ -4122,10 +4831,72 @@ fn gen_throw(
Some(EndBlock)
}
+fn gen_opt_new(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ let cd = jit.get_arg(0).as_ptr();
+ let jump_offset = jit.get_arg(1).as_i32();
+
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
+ }
+
+ let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
+ let mid = unsafe { vm_ci_mid(ci) };
+ let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
+
+ let recv_idx = argc;
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize);
+
+ // This is a singleton class
+ let comptime_recv_klass = comptime_recv.class_of();
+
+ let recv = asm.stack_opnd(recv_idx);
+
+ perf_call!("opt_new: ", jit_guard_known_klass(
+ jit,
+ asm,
+ recv,
+ recv.into(),
+ comptime_recv,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_klass_megamorphic,
+ ));
+
+ // We now know that it's always comptime_recv_klass
+ if jit.assume_expected_cfunc(asm, comptime_recv_klass, mid, rb_class_new_instance_pass_kw as _) {
+ // Fast path
+ // call rb_class_alloc to actually allocate
+ jit_prepare_non_leaf_call(jit, asm);
+ let obj = asm.ccall(rb_obj_alloc as _, vec![comptime_recv.into()]);
+
+ // Get a reference to the stack location where we need to save the
+ // return instance.
+ let result = asm.stack_opnd(recv_idx + 1);
+ let recv = asm.stack_opnd(recv_idx);
+
+ // Replace the receiver for the upcoming initialize call
+ asm.ctx.set_opnd_mapping(recv.into(), TempMapping::MapToStack(Type::UnknownHeap));
+ asm.mov(recv, obj);
+
+ // Save the allocated object for return
+ asm.ctx.set_opnd_mapping(result.into(), TempMapping::MapToStack(Type::UnknownHeap));
+ asm.mov(result, obj);
+
+ jump_to_next_insn(jit, asm)
+ } else {
+ // general case
+
+ // Get the branch target instruction offsets
+ let jump_idx = jit.next_insn_idx() as i32 + jump_offset;
+ return end_block_with_jump(jit, asm, jump_idx as u16);
+ }
+}
+
fn gen_jump(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let jump_offset = jit.get_arg(0).as_i32();
@@ -4156,19 +4927,28 @@ fn gen_jump(
fn jit_guard_known_klass(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
- known_klass: VALUE,
obj_opnd: Opnd,
insn_opnd: YARVOpnd,
sample_instance: VALUE,
- max_chain_depth: i32,
+ max_chain_depth: u8,
counter: Counter,
) {
+ let known_klass = sample_instance.class_of();
let val_type = asm.ctx.get_opnd_type(insn_opnd);
if val_type.known_class() == Some(known_klass) {
- // We already know from type information that this is a match
- return;
+ // Unless frozen, Array, Hash, and String objects may change their RBASIC_CLASS
+ // when they get a singleton class. Those types need invalidations.
+ if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&known_klass) } {
+ if jit.assume_no_singleton_class(asm, known_klass) {
+ // Speculate that this object will not have a singleton class,
+ // and invalidate the block in case it does.
+ return;
+ }
+ } else {
+ // We already know from type information that this is a match
+ return;
+ }
}
if unsafe { known_klass == rb_cNilClass } {
@@ -4177,7 +4957,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is nil");
asm.cmp(obj_opnd, Qnil.into());
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::Nil);
} else if unsafe { known_klass == rb_cTrueClass } {
@@ -4186,7 +4966,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is true");
asm.cmp(obj_opnd, Qtrue.into());
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::True);
} else if unsafe { known_klass == rb_cFalseClass } {
@@ -4196,7 +4976,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is false");
assert!(Qfalse.as_i32() == 0);
asm.test(obj_opnd, obj_opnd);
- jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNZ, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::False);
} else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() {
@@ -4206,7 +4986,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is fixnum");
asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64));
- jit_chain_guard(JCC_JZ, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JZ, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum);
} else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() {
assert!(!val_type.is_heap());
@@ -4218,7 +4998,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is static symbol");
assert!(RUBY_SPECIAL_SHIFT == 8);
asm.cmp(obj_opnd.with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64));
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol);
}
} else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() {
@@ -4230,7 +5010,7 @@ fn jit_guard_known_klass(
asm_comment!(asm, "guard object is flonum");
let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64));
asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64));
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::Flonum);
}
} else if unsafe {
@@ -4252,23 +5032,23 @@ fn jit_guard_known_klass(
// IO#reopen can be used to change the class and singleton class of IO objects!
asm_comment!(asm, "guard known object with singleton class");
asm.cmp(obj_opnd, sample_instance.into());
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
} else if val_type == Type::CString && unsafe { known_klass == rb_cString } {
// guard elided because the context says we've already checked
unsafe {
assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String")
};
} else {
- assert!(!val_type.is_imm());
+ assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}");
// Check that the receiver is a heap object
// Note: if we get here, the class doesn't have immediate instances.
if !val_type.is_heap() {
asm_comment!(asm, "guard not immediate");
asm.test(obj_opnd, (RUBY_IMMEDIATE_MASK as u64).into());
- jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNZ, jit, asm, max_chain_depth, counter);
asm.cmp(obj_opnd, Qfalse.into());
- jit_chain_guard(JCC_JE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JE, jit, asm, max_chain_depth, counter);
asm.ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap);
}
@@ -4284,17 +5064,14 @@ fn jit_guard_known_klass(
// TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
asm_comment!(asm, "guard known class");
asm.cmp(klass_opnd, known_klass.into());
- jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ jit_chain_guard(JCC_JNE, jit, asm, max_chain_depth, counter);
if known_klass == unsafe { rb_cString } {
- // Upgrading to Type::CString here is incorrect.
- // The guard we put only checks RBASIC_CLASS(obj),
- // which adding a singleton class can change. We
- // additionally need to know the string is frozen
- // to claim Type::CString.
- asm.ctx.upgrade_opnd_type(insn_opnd, Type::TString);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CString);
} else if known_klass == unsafe { rb_cArray } {
- asm.ctx.upgrade_opnd_type(insn_opnd, Type::TArray);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CArray);
+ } else if known_klass == unsafe { rb_cHash } {
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CHash);
}
}
}
@@ -4327,12 +5104,11 @@ fn jit_protected_callee_ancestry_guard(
fn jit_rb_obj_not(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
let recv_opnd = asm.ctx.get_opnd_type(StackOpnd(0));
@@ -4362,12 +5138,11 @@ fn jit_rb_obj_not(
fn jit_rb_true(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "nil? == true");
asm.stack_pop(1);
@@ -4380,12 +5155,11 @@ fn jit_rb_true(
fn jit_rb_false(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "nil? == false");
asm.stack_pop(1);
@@ -4398,19 +5172,18 @@ fn jit_rb_false(
fn jit_rb_kernel_is_a(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool {
if argc != 1 {
return false;
}
// If this is a super call we might not know the class
- if known_recv_class.is_null() {
+ if known_recv_class.is_none() {
return false;
}
@@ -4451,19 +5224,18 @@ fn jit_rb_kernel_is_a(
fn jit_rb_kernel_instance_of(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool {
if argc != 1 {
return false;
}
// If this is a super call we might not know the class
- if known_recv_class.is_null() {
+ if known_recv_class.is_none() {
return false;
}
@@ -4496,7 +5268,6 @@ fn jit_rb_kernel_instance_of(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_send_instance_of_class_mismatch,
);
@@ -4516,12 +5287,11 @@ fn jit_rb_kernel_instance_of(
fn jit_rb_mod_eqq(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
if argc != 1 {
return false;
@@ -4530,7 +5300,7 @@ fn jit_rb_mod_eqq(
asm_comment!(asm, "Module#===");
// By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can
// only live on these objects. With that, we can call rb_obj_is_kind_of() without
- // jit_prepare_routine_call() or a control frame push because it can't raise, allocate, or call
+ // jit_prepare_non_leaf_call() or a control frame push because it can't raise, allocate, or call
// Ruby methods with these inputs.
// Note the difference in approach from Kernel#is_a? because we don't get a free guard for the
// right hand side.
@@ -4545,17 +5315,43 @@ fn jit_rb_mod_eqq(
return true;
}
+// Substitution for rb_mod_name(). Returns the name of a module/class.
+fn jit_rb_mod_name(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 0 {
+ return false;
+ }
+
+ asm_comment!(asm, "Module#name");
+
+ // rb_mod_name() never allocates, so no preparation needed.
+ let name = asm.ccall(rb_mod_name as _, vec![asm.stack_opnd(0)]);
+
+ let _ = asm.stack_pop(1); // pop self
+ // call-seq: mod.name -> string or nil
+ let ret = asm.stack_push(Type::Unknown);
+ asm.mov(ret, name);
+
+ true
+}
+
// Codegen for rb_obj_equal()
// object identity comparison
fn jit_rb_obj_equal(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "equal?");
let obj1 = asm.stack_pop(1);
@@ -4574,29 +5370,27 @@ fn jit_rb_obj_equal(
fn jit_rb_obj_not_equal(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- gen_equality_specialized(jit, asm, ocb, false) == Some(true)
+ gen_equality_specialized(jit, asm, false) == Some(true)
}
// Codegen for rb_int_equal()
fn jit_rb_int_equal(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
// Check that both operands are fixnums
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// Compare the arguments
asm_comment!(asm, "rb_int_equal");
@@ -4613,12 +5407,11 @@ fn jit_rb_int_equal(
fn jit_rb_int_succ(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
// Guard the receiver is fixnum
let recv_type = asm.ctx.get_opnd_type(StackOpnd(0));
@@ -4640,23 +5433,51 @@ fn jit_rb_int_succ(
true
}
+fn jit_rb_int_pred(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard the receiver is fixnum
+ let recv_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let recv = asm.stack_pop(1);
+ if recv_type != Type::Fixnum {
+ asm_comment!(asm, "guard object is fixnum");
+ asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64));
+ asm.jz(Target::side_exit(Counter::send_pred_not_fixnum));
+ }
+
+ asm_comment!(asm, "Integer#pred");
+ let out_val = asm.sub(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1
+ asm.jo(Target::side_exit(Counter::send_pred_underflow));
+
+ // Push the output onto the stack
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, out_val);
+
+ true
+}
+
fn jit_rb_int_div(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
return false;
}
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
// rb_fix_div_fix may GC-allocate for Bignum
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
asm_comment!(asm, "Integer#/");
let obj = asm.stack_opnd(0);
@@ -4677,17 +5498,16 @@ fn jit_rb_int_div(
fn jit_rb_int_lshift(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
return false;
}
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
let comptime_shift = jit.peek_at_stack(&asm.ctx, 0);
@@ -4702,7 +5522,9 @@ fn jit_rb_int_lshift(
}
// Fallback to a C call if the shift amount varies
- if asm.ctx.get_chain_depth() > 1 {
+ // This check is needed because the chain guard will side-exit
+ // if its max depth is reached
+ if asm.ctx.get_chain_depth() > 0 {
return false;
}
@@ -4715,13 +5537,17 @@ fn jit_rb_int_lshift(
JCC_JNE,
jit,
asm,
- ocb,
- 2, // defer_compilation increments chain_depth
+ 1,
Counter::lshift_amount_changed,
);
+ fixnum_left_shift_body(asm, lhs, shift_amt as u64);
+ true
+}
+
+fn fixnum_left_shift_body(asm: &mut Assembler, lhs: Opnd, shift_amt: u64) {
let in_val = asm.sub(lhs, 1.into());
- let shift_opnd = Opnd::UImm(shift_amt as u64);
+ let shift_opnd = Opnd::UImm(shift_amt);
let out_val = asm.lshift(in_val, shift_opnd);
let unshifted = asm.rshift(out_val, shift_opnd);
@@ -4734,18 +5560,93 @@ fn jit_rb_int_lshift(
let ret_opnd = asm.stack_push(Type::Fixnum);
asm.mov(ret_opnd, out_val);
+}
+
+fn jit_rb_int_rshift(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm);
+
+ let comptime_shift = jit.peek_at_stack(&asm.ctx, 0);
+
+ // Untag the fixnum shift amount
+ let shift_amt = comptime_shift.as_isize() >> 1;
+ if shift_amt > 63 || shift_amt < 0 {
+ return false;
+ }
+
+ // Fallback to a C call if the shift amount varies
+ // This check is needed because the chain guard will side-exit
+ // if its max depth is reached
+ if asm.ctx.get_chain_depth() > 0 {
+ return false;
+ }
+
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1);
+
+ // Guard on the shift amount we speculated on
+ asm.cmp(rhs, comptime_shift.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ 1,
+ Counter::rshift_amount_changed,
+ );
+
+ let shift_opnd = Opnd::UImm(shift_amt as u64);
+ let out_val = asm.rshift(lhs, shift_opnd);
+ let out_val = asm.or(out_val, 1.into());
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, out_val);
+ true
+}
+
+fn jit_rb_int_xor(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm);
+
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1);
+
+ // XOR and then re-tag the resulting fixnum
+ let out_val = asm.xor(lhs, rhs);
+ let out_val = asm.or(out_val, 1.into());
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, out_val);
true
}
fn jit_rb_int_aref(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
if argc != 1 {
return false;
@@ -4753,7 +5654,7 @@ fn jit_rb_int_aref(
if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
return false;
}
- guard_two_fixnums(jit, asm, ocb);
+ guard_two_fixnums(jit, asm);
asm_comment!(asm, "Integer#[]");
let obj = asm.stack_pop(1);
@@ -4766,16 +5667,179 @@ fn jit_rb_int_aref(
true
}
+fn jit_rb_float_plus(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#+");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_plus as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_minus(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#-");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_minus as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_mul(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#*");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_mul as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_div(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#/");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_div as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it.
fn jit_rb_str_uplus(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool
{
if argc != 0 {
@@ -4783,14 +5847,14 @@ fn jit_rb_str_uplus(
}
// We allocate when we dup the string
- jit_prepare_routine_call(jit, asm);
- asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
+ jit_prepare_call_with_gc(jit, asm);
+ asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency.
asm_comment!(asm, "Unary plus on string");
let recv_opnd = asm.stack_pop(1);
let recv_opnd = asm.load(recv_opnd);
let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS));
- asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64));
+ asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64 | RSTRING_CHILLED as i64));
let ret_label = asm.new_label("stack_ret");
@@ -4802,7 +5866,7 @@ fn jit_rb_str_uplus(
asm.jz(ret_label);
// Str is frozen - duplicate it
- asm.spill_temps(); // for ccall
+ asm.spill_regs(); // for ccall
let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]);
asm.mov(stack_ret, ret_opnd);
@@ -4814,12 +5878,11 @@ fn jit_rb_str_uplus(
fn jit_rb_str_length(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "String#length");
extern "C" {
@@ -4841,12 +5904,11 @@ fn jit_rb_str_length(
fn jit_rb_str_bytesize(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "String#bytesize");
@@ -4870,29 +5932,214 @@ fn jit_rb_str_bytesize(
true
}
+fn jit_rb_str_byteslice(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 2 {
+ return false
+ }
+
+ // rb_str_byte_substr should be leaf if indexes are fixnums
+ match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) {
+ (Type::Fixnum, Type::Fixnum) => {},
+ // Raises when non-integers are passed in, which requires the method frame
+ // to be pushed for the backtrace
+ _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) {
+ return false;
+ }
+ }
+ asm_comment!(asm, "String#byteslice");
+
+ // rb_str_byte_substr allocates a substring
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Get stack operands after potential SP change
+ let len = asm.stack_opnd(0);
+ let beg = asm.stack_opnd(1);
+ let recv = asm.stack_opnd(2);
+
+ let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]);
+ asm.stack_pop(3);
+
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ret_opnd);
+
+ true
+}
+
+fn jit_rb_str_aref_m(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // In yjit-bench the most common usages by far are single fixnum or two fixnums.
+ // rb_str_substr should be leaf if indexes are fixnums
+ if argc == 2 {
+ match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) {
+ (Type::Fixnum, Type::Fixnum) => {},
+ // There is a two-argument form of (RegExp, Fixnum) which needs a different c func.
+ // Other types will raise.
+ _ => { return false },
+ }
+ } else if argc == 1 {
+ match asm.ctx.get_opnd_type(StackOpnd(0)) {
+ Type::Fixnum => {},
+ // Besides Fixnum this could also be a Range or a RegExp which are handled by separate c funcs.
+ // Other types will raise.
+ _ => {
+ // If the context doesn't have the type info we try a little harder.
+ let comptime_arg = jit.peek_at_stack(&asm.ctx, 0);
+ let arg0 = asm.stack_opnd(0);
+ if comptime_arg.fixnum_p() {
+ asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_str_aref_not_fixnum,
+ );
+ } else {
+ return false
+ }
+ },
+ }
+ } else {
+ return false
+ }
+
+ asm_comment!(asm, "String#[]");
+
+ // rb_str_substr allocates a substring
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Get stack operands after potential SP change
+
+ // The "empty" arg distinguishes between the normal "one arg" behavior
+ // and the "two arg" special case that returns an empty string
+ // when the begin index is the length of the string.
+ // See the usages of rb_str_substr in string.c for more information.
+ let (beg_idx, empty, len) = if argc == 2 {
+ (1, Opnd::Imm(1), asm.stack_opnd(0))
+ } else {
+ // If there is only one arg, the length will be 1.
+ (0, Opnd::Imm(0), VALUE::fixnum_from_usize(1).into())
+ };
+
+ let beg = asm.stack_opnd(beg_idx);
+ let recv = asm.stack_opnd(beg_idx + 1);
+
+ let ret_opnd = asm.ccall(rb_str_substr_two_fixnums as *const u8, vec![recv, beg, len, empty]);
+ asm.stack_pop(beg_idx as usize + 2);
+
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ret_opnd);
+
+ true
+}
+
fn jit_rb_str_getbyte(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "String#getbyte");
- extern "C" {
- fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE;
- }
- // Raises when non-integers are passed in
- jit_prepare_routine_call(jit, asm);
- let index = asm.stack_opnd(0);
+ // Don't pop since we may bail
+ let idx = asm.stack_opnd(0);
let recv = asm.stack_opnd(1);
- let ret_opnd = asm.ccall(rb_str_getbyte as *const u8, vec![recv, index]);
+
+ let comptime_idx = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_idx.fixnum_p(){
+ jit_guard_known_klass(
+ jit,
+ asm,
+ idx,
+ idx.into(),
+ comptime_idx,
+ SEND_MAX_DEPTH,
+ Counter::getbyte_idx_not_fixnum,
+ );
+ } else {
+ return false;
+ }
+
+ // Untag the index
+ let idx = asm.rshift(idx, Opnd::UImm(1));
+
+ // If index is negative, exit
+ asm.cmp(idx, Opnd::UImm(0));
+ asm.jl(Target::side_exit(Counter::getbyte_idx_negative));
+
+ asm_comment!(asm, "get string length");
+ let recv = asm.load(recv);
+ let str_len_opnd = Opnd::mem(
+ std::os::raw::c_long::BITS as u8,
+ asm.load(recv),
+ RUBY_OFFSET_RSTRING_LEN as i32,
+ );
+
+ // Exit if the index is out of bounds
+ asm.cmp(idx, str_len_opnd);
+ asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds));
+
+ let str_ptr = get_string_ptr(asm, recv);
+ // FIXME: could use SIB indexing here with proper support in backend
+ let str_ptr = asm.add(str_ptr, idx);
+ let byte = asm.load(Opnd::mem(8, str_ptr, 0));
+
+ // Zero-extend the byte to 64 bits
+ let byte = byte.with_num_bits(64).unwrap();
+ let byte = asm.and(byte, 0xFF.into());
+
+ // Tag the byte
+ let byte = asm.lshift(byte, Opnd::UImm(1));
+ let byte = asm.or(byte, Opnd::UImm(1));
+
asm.stack_pop(2); // Keep them on stack during ccall for GC
+ let out_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(out_opnd, byte);
+
+ true
+}
+
+fn jit_rb_str_setbyte(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Raises when index is out of range. Lazily push a frame in that case.
+ if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) {
+ return false;
+ }
+ asm_comment!(asm, "String#setbyte");
+
+ let value = asm.stack_opnd(0);
+ let index = asm.stack_opnd(1);
+ let recv = asm.stack_opnd(2);
+
+ let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]);
+ asm.stack_pop(3); // Keep them on stack during ccall for GC
- // Can either return a FIXNUM or nil
let out_opnd = asm.stack_push(Type::UnknownImm);
asm.mov(out_opnd, ret_opnd);
@@ -4906,14 +6153,13 @@ fn jit_rb_str_getbyte(
fn jit_rb_str_to_s(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool {
- if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } {
+ if unsafe { known_recv_class == Some(rb_cString) } {
asm_comment!(asm, "to_s on plain string");
// The method returns the receiver, which is already on the stack.
// No stack movement.
@@ -4922,16 +6168,50 @@ fn jit_rb_str_to_s(
false
}
+fn jit_rb_str_dup(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ known_recv_class: Option<VALUE>,
+) -> bool {
+ // We specialize only the BARE_STRING_P case. Otherwise it's not leaf.
+ if unsafe { known_recv_class != Some(rb_cString) } {
+ return false;
+ }
+ asm_comment!(asm, "String#dup");
+
+ jit_prepare_call_with_gc(jit, asm);
+
+ let recv_opnd = asm.stack_opnd(0);
+ let recv_opnd = asm.load(recv_opnd);
+
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(64, recv_opnd, shape_id_offset);
+ asm.test(shape_opnd, Opnd::UImm(SHAPE_ID_HAS_IVAR_MASK as u64));
+ asm.jnz(Target::side_exit(Counter::send_str_dup_exivar));
+
+ // Call rb_str_dup
+ let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]);
+
+ asm.stack_pop(1);
+ let stack_ret = asm.stack_push(Type::CString);
+ asm.mov(stack_ret, ret_opnd);
+
+ true
+}
+
// Codegen for rb_str_empty_p()
fn jit_rb_str_empty_p(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
let recv_opnd = asm.stack_pop(1);
@@ -4950,24 +6230,59 @@ fn jit_rb_str_empty_p(
return true;
}
-// Codegen for rb_str_concat() -- *not* String#concat
-// Frequently strings are concatenated using "out_str << next_str".
-// This is common in Erb and similar templating languages.
-fn jit_rb_str_concat(
+// Codegen for rb_str_concat() with an integer argument -- *not* String#concat
+// Using strings as a byte buffer often includes appending byte values to the end of the string.
+fn jit_rb_str_concat_codepoint(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "String#<< with codepoint argument");
+
+ // Either of the string concatenation functions we call will reallocate the string to grow its
+ // capacity if necessary. In extremely rare cases (i.e., string exceeds `LONG_MAX` bytes),
+ // either of the called functions will raise an exception.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let codepoint = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0));
+
+ asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]);
+
+ // The receiver is the return value, so we only need to pop the codepoint argument off the stack.
+ // We can reuse the receiver slot in the stack as the return value.
+ asm.stack_pop(1);
+
+ true
+}
+
+// Codegen for rb_str_concat() -- *not* String#concat
+// Frequently strings are concatenated using "out_str << next_str".
+// This is common in Erb and similar templating languages.
+fn jit_rb_str_concat(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ block: Option<BlockHandler>,
+ argc: i32,
+ known_recv_class: Option<VALUE>,
) -> bool {
// The << operator can accept integer codepoints for characters
// as the argument. We only specially optimise string arguments.
// If the peeked-at compile time argument is something other than
// a string, assume it won't be a string later either.
let comptime_arg = jit.peek_at_stack(&asm.ctx, 0);
+ if unsafe { RB_TYPE_P(comptime_arg, RUBY_T_FIXNUM) } {
+ return jit_rb_str_concat_codepoint(jit, asm, ci, cme, block, argc, known_recv_class);
+ }
+
if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } {
return false;
}
@@ -4975,11 +6290,18 @@ fn jit_rb_str_concat(
// Guard that the concat argument is a string
guard_object_is_string(asm, asm.stack_opnd(0), StackOpnd(0), Counter::guard_send_not_string);
- // Guard buffers from GC since rb_str_buf_append may allocate. During the VM lock on GC,
- // other Ractors may trigger global invalidation, so we need ctx.clear_local_types().
- // PC is used on errors like Encoding::CompatibilityError raised by rb_str_buf_append.
- jit_prepare_routine_call(jit, asm);
- asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
+ // Guard buffers from GC since rb_str_buf_append may allocate.
+ // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised
+ // backtraces on this method since the interpreter does the same thing on opt_ltlt.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Explicitly spill temps before making any C calls. `ccall` will spill temps, but it does a
+ // check to only spill if it thinks it's necessary. That logic can't see through the runtime
+ // branching occurring in the code generated for this function. Consequently, the branch for
+ // the first `ccall` will spill registers but the second one will not. At run time, we may
+ // jump over that spill code when executing the second branch, leading situations that are
+ // quite hard to debug. If we spill up front we avoid diverging behavior.
+ asm.spill_regs();
let concat_arg = asm.stack_pop(1);
let recv = asm.stack_pop(1);
@@ -5012,7 +6334,7 @@ fn jit_rb_str_concat(
// If encodings are different, use a slower encoding-aware concatenate
asm.write_label(enc_mismatch);
- asm.spill_temps(); // Ignore the register for the other local branch
+ asm.spill_regs(); // Ignore the register for the other local branch
let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]);
let stack_ret = asm.stack_push(Type::TString);
asm.mov(stack_ret, ret_opnd);
@@ -5027,12 +6349,11 @@ fn jit_rb_str_concat(
fn jit_rb_ary_empty_p(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
let array_opnd = asm.stack_pop(1);
let array_reg = asm.load(array_opnd);
@@ -5051,12 +6372,11 @@ fn jit_rb_ary_empty_p(
fn jit_rb_ary_length(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
let array_opnd = asm.stack_pop(1);
let array_reg = asm.load(array_opnd);
@@ -5075,17 +6395,18 @@ fn jit_rb_ary_length(
fn jit_rb_ary_push(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "Array#<<");
- // rb_ary_push allocates memory for buffer extension
- jit_prepare_routine_call(jit, asm);
+ // rb_ary_push allocates memory for buffer extension and can raise FrozenError
+ // Not using a lazy frame here since the interpreter also has a truncated
+ // stack trace from opt_ltlt.
+ jit_prepare_non_leaf_call(jit, asm);
let item_opnd = asm.stack_opnd(0);
let ary_opnd = asm.stack_opnd(1);
@@ -5097,26 +6418,44 @@ fn jit_rb_ary_push(
true
}
+// Just a leaf method, but not using `Primitive.attr! :leaf` since BOP methods can't use it.
+fn jit_rb_hash_empty_p(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "Hash#empty?");
+
+ let hash_opnd = asm.stack_pop(1);
+ let ret = asm.ccall(rb_hash_empty_p as *const u8, vec![hash_opnd]);
+
+ let ret_opnd = asm.stack_push(Type::UnknownImm);
+ asm.mov(ret_opnd, ret);
+ true
+}
+
fn jit_obj_respond_to(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool {
// respond_to(:sym) or respond_to(:sym, true)
if argc != 1 && argc != 2 {
return false;
}
- if known_recv_class.is_null() {
- return false;
- }
-
- let recv_class = unsafe { *known_recv_class };
+ let recv_class = match known_recv_class {
+ Some(class) => class,
+ None => return false,
+ };
// Get the method_id from compile time. We will later add a guard against it.
let mid_sym = jit.peek_at_stack(&asm.ctx, (argc - 1) as isize);
@@ -5155,7 +6494,7 @@ fn jit_obj_respond_to(
(METHOD_VISI_UNDEF, _) => {
// No method, we can return false given respond_to_missing? hasn't been overridden.
// In the future, we might want to jit the call to respond_to_missing?
- if !assume_method_basic_definition(jit, asm, ocb, recv_class, ID!(respond_to_missing)) {
+ if !assume_method_basic_definition(jit, asm, recv_class, ID!(respond_to_missing)) {
return false;
}
Qfalse
@@ -5177,7 +6516,7 @@ fn jit_obj_respond_to(
// Invalidate this block if method lookup changes for the method being queried. This works
// both for the case where a method does or does not exist, as for the latter we asked for a
// "negative CME" earlier.
- jit.assume_method_lookup_stable(asm, ocb, target_cme);
+ jit.assume_method_lookup_stable(asm, target_cme);
if argc == 2 {
// pop include_all argument (we only use its type info)
@@ -5194,7 +6533,6 @@ fn jit_obj_respond_to(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_send_respond_to_mid_mismatch,
);
@@ -5207,28 +6545,103 @@ fn jit_obj_respond_to(
fn jit_rb_f_block_given_p(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
+ asm.stack_pop(1);
+ let out_opnd = asm.stack_push(Type::UnknownImm);
+
+ gen_block_given(jit, asm, out_opnd, Qtrue.into(), Qfalse.into());
+
+ true
+}
+
+/// Codegen for `block_given?` and `defined?(yield)`
+fn gen_block_given(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ out_opnd: Opnd,
+ true_opnd: Opnd,
+ false_opnd: Opnd,
+) {
asm_comment!(asm, "block_given?");
- // Same as rb_vm_frame_block_handler
- let ep_opnd = gen_get_lep(jit, asm);
- let block_handler = asm.load(
- Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
- );
+ // `yield` goes to the block handler stowed in the "local" iseq which is
+ // the current iseq or a parent. Only the "method" iseq type can be passed a
+ // block handler. (e.g. `yield` in the top level script is a syntax error.)
+ let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) };
+ if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD {
+ // Same as rb_vm_frame_block_handler
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
+
+ // Return `block_handler != VM_BLOCK_HANDLER_NONE`
+ asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into());
+ let block_given = asm.csel_ne(true_opnd, false_opnd);
+ asm.mov(out_opnd, block_given);
+ } else {
+ asm.mov(out_opnd, false_opnd);
+ }
+}
+
+// Codegen for rb_class_superclass()
+fn jit_rb_class_superclass(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<crate::codegen::BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ extern "C" {
+ fn rb_class_superclass(klass: VALUE) -> VALUE;
+ }
+
+ // It may raise "uninitialized class"
+ if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) {
+ return false;
+ }
+
+ asm_comment!(asm, "Class#superclass");
+ let recv_opnd = asm.stack_opnd(0);
+ let ret = asm.ccall(rb_class_superclass as *const u8, vec![recv_opnd]);
asm.stack_pop(1);
- let out_opnd = asm.stack_push(Type::UnknownImm);
+ let ret_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(ret_opnd, ret);
+
+ true
+}
+
+fn jit_rb_case_equal(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ known_recv_class: Option<VALUE>,
+) -> bool {
+ if !jit.assume_expected_cfunc(asm, known_recv_class.unwrap(), ID!(eq), rb_obj_equal as _) {
+ return false;
+ }
- // Return `block_handler != VM_BLOCK_HANDLER_NONE`
- asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into());
- let block_given = asm.csel_ne(Qtrue.into(), Qfalse.into());
- asm.mov(out_opnd, block_given);
+ asm_comment!(asm, "case_equal: {}#===", get_class_name(known_recv_class));
+
+ // Compare the arguments
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
+ asm.cmp(arg0, arg1);
+ let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into());
+
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret_opnd);
true
}
@@ -5236,18 +6649,17 @@ fn jit_rb_f_block_given_p(
fn jit_thread_s_current(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
_block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
asm_comment!(asm, "Thread.current");
asm.stack_pop(1);
// ec->thread_ptr
- let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR));
+ let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR as i32));
// thread->self
let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF);
@@ -5257,7 +6669,29 @@ fn jit_thread_s_current(
true
}
-// Check if we know how to codegen for a particular cfunc method
+/// Specialization for rb_obj_dup() (Kernel#dup)
+fn jit_rb_obj_dup(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Kernel#dup has arity=0, and caller already did argument count check.
+ let self_type = asm.ctx.get_opnd_type(StackOpnd(0));
+
+ if self_type.is_imm() {
+ // Method is no-op when receiver is an immediate value.
+ true
+ } else {
+ false
+ }
+}
+
+/// Check if we know how to codegen for a particular cfunc method
+/// See also: [reg_method_codegen].
fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> {
let method_serial = unsafe { get_def_method_serial(def) };
let table = unsafe { METHOD_CODEGEN_TABLE.as_ref().unwrap() };
@@ -5436,14 +6870,6 @@ fn gen_push_frame(
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
- if frame.iseq.is_some() {
- // Spill stack temps to let the callee use them (must be done before changing the SP register)
- asm.spill_temps();
-
- // Saving SP before calculating ep avoids a dependency on a register
- // However this must be done after referencing frame.recv, which may be SP-relative
- asm.mov(SP, sp);
- }
let ep = asm.sub(sp, SIZEOF_VALUE.into());
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
}
@@ -5451,11 +6877,10 @@ fn gen_push_frame(
fn gen_send_cfunc(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
block: Option<BlockHandler>,
- recv_known_klass: *const VALUE,
+ recv_known_class: Option<VALUE>,
flags: u32,
argc: i32,
) -> Option<CodegenStatus> {
@@ -5463,38 +6888,18 @@ fn gen_send_cfunc(
let cfunc_argc = unsafe { get_mct_argc(cfunc) };
let mut argc = argc;
- // If the function expects a Ruby array of arguments
- if cfunc_argc < 0 && cfunc_argc != -1 {
- gen_counter_incr(asm, Counter::send_cfunc_ruby_array_varg);
- return None;
- }
+ // Splat call to a C method that takes `VALUE *` and `len`
+ let variable_splat = flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1;
+ let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0;
- // We aren't handling a vararg cfuncs with splat currently.
- if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1 {
- gen_counter_incr(asm, Counter::send_args_splat_cfunc_var_args);
+ // If it's a splat and the method expects a Ruby array of arguments
+ if cfunc_argc == -2 && flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(jit, asm, Counter::send_cfunc_splat_neg2);
return None;
}
- if flags & VM_CALL_ARGS_SPLAT != 0 && flags & VM_CALL_ZSUPER != 0 {
- // zsuper methods are super calls without any arguments.
- // They are also marked as splat, but don't actually have an array
- // they pull arguments from, instead we need to change to call
- // a different method with the current stack.
- gen_counter_incr(asm, Counter::send_args_splat_cfunc_zuper);
- return None;
- }
-
- // In order to handle backwards compatibility between ruby 3 and 2
- // ruby2_keywords was introduced. It is called only on methods
- // with splat and changes they way they handle them.
- // We are just going to not compile these.
- // https://docs.ruby-lang.org/en/3.2/Module.html#method-i-ruby2_keywords
- if unsafe {
- get_iseq_flags_ruby2_keywords(jit.iseq) && flags & VM_CALL_ARGS_SPLAT != 0
- } {
- gen_counter_incr(asm, Counter::send_args_splat_cfunc_ruby2_keywords);
- return None;
- }
+ exit_if_kwsplat_non_nil(jit, asm, flags, Counter::send_cfunc_kw_splat_non_nil)?;
+ let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
let kw_arg = unsafe { vm_ci_kwarg(ci) };
let kw_arg_num = if kw_arg.is_null() {
@@ -5504,61 +6909,49 @@ fn gen_send_cfunc(
};
if kw_arg_num != 0 && flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_cfunc_splat_with_kw);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_splat_with_kw);
return None;
}
if c_method_tracing_currently_enabled(jit) {
// Don't JIT if tracing c_call or c_return
- gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_tracing);
return None;
}
// Increment total cfunc send count
- gen_counter_incr(asm, Counter::num_send_cfunc);
-
- // Delegate to codegen for C methods if we have it.
- if kw_arg.is_null() && flags & VM_CALL_OPT_SEND == 0 && flags & VM_CALL_ARGS_SPLAT == 0 && (cfunc_argc == -1 || argc == cfunc_argc) {
- let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def });
+ gen_counter_incr(jit, asm, Counter::num_send_cfunc);
+
+ // Delegate to codegen for C methods if we have it and the callsite is simple enough.
+ if kw_arg.is_null() &&
+ !kw_splat &&
+ flags & VM_CALL_OPT_SEND == 0 &&
+ flags & VM_CALL_ARGS_SPLAT == 0 &&
+ flags & VM_CALL_ARGS_BLOCKARG == 0 &&
+ (cfunc_argc == -1 || argc == cfunc_argc) {
let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc;
- if let Some(known_cfunc_codegen) = codegen_p {
- if known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_klass) {
+ if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) {
+ // We don't push a frame for specialized cfunc codegen, so the generated code must be leaf.
+ // However, the interpreter doesn't push a frame on opt_* instruction either, so we allow
+ // non-sendish instructions to break this rule as an exception.
+ let cfunc_codegen = if jit.is_sendish() {
+ asm.with_leaf_ccall(|asm|
+ perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ci, cme, block, argc, recv_known_class))
+ )
+ } else {
+ perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ci, cme, block, argc, recv_known_class))
+ };
+
+ if cfunc_codegen {
assert_eq!(expected_stack_after, asm.ctx.get_stack_size() as i32);
- gen_counter_incr(asm, Counter::num_send_cfunc_inline);
+ gen_counter_incr(jit, asm, Counter::num_send_cfunc_inline);
// cfunc codegen generated code. Terminate the block so
// there isn't multiple calls in the same block.
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
}
}
}
- // Log the name of the method we're calling to,
- // note that we intentionally don't do this for inlined cfuncs
- if get_option!(gen_stats) {
- // TODO: extract code to get method name string into its own function
-
- // Assemble the method name string
- let mid = unsafe { vm_ci_mid(ci) };
- let class_name = if recv_known_klass != ptr::null() {
- unsafe { cstr_to_rust_string(rb_class2name(*recv_known_klass)) }.unwrap()
- } else {
- "Unknown".to_string()
- };
- let method_name = if mid != 0 {
- unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap()
- } else {
- "Unknown".to_string()
- };
- let name_str = format!("{}#{}", class_name, method_name);
-
- // Get an index for this cfunc name
- let cfunc_idx = get_cfunc_idx(&name_str);
-
- // Increment the counter for this cfunc
- asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]);
- }
-
// Check for interrupts
gen_check_ints(asm, Counter::guard_send_interrupted);
@@ -5566,10 +6959,30 @@ fn gen_send_cfunc(
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
// REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t)
asm_comment!(asm, "stack overflow check");
- let stack_limit = asm.lea(asm.ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize));
+ const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)");
+ let stack_limit = asm.lea(asm.ctx.sp_opnd((4 + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE)) as i32));
asm.cmp(CFP, stack_limit);
asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow));
+ // Guard for variable length splat call before any modifications to the stack
+ if variable_splat {
+ let splat_array_idx = i32::from(kw_splat) + i32::from(block_arg);
+ let comptime_splat_array = jit.peek_at_stack(&asm.ctx, splat_array_idx as isize);
+ if unsafe { rb_yjit_ruby2_keywords_splat_p(comptime_splat_array) } != 0 {
+ gen_counter_incr(jit, asm, Counter::send_cfunc_splat_varg_ruby2_keywords);
+ return None;
+ }
+
+ let splat_array = asm.stack_opnd(splat_array_idx);
+ guard_object_is_array(asm, splat_array, splat_array.into(), Counter::guard_send_splat_not_array);
+
+ asm_comment!(asm, "guard variable length splat call servicable");
+ let sp = asm.ctx.sp_opnd(0);
+ let proceed = asm.ccall(rb_yjit_splat_varg_checks as _, vec![sp, splat_array, CFP]);
+ asm.cmp(proceed, Qfalse.into());
+ asm.je(Target::side_exit(Counter::guard_send_cfunc_bad_splat_vargs));
+ }
+
// Number of args which will be passed through to the callee
// This is adjusted by the kwargs being combined into a hash.
let mut passed_argc = if kw_arg.is_null() {
@@ -5578,20 +6991,24 @@ fn gen_send_cfunc(
argc - kw_arg_num + 1
};
+ // Exclude the kw_splat hash from arity check
+ if kw_splat {
+ passed_argc -= 1;
+ }
+
// If the argument count doesn't match
if cfunc_argc >= 0 && cfunc_argc != passed_argc && flags & VM_CALL_ARGS_SPLAT == 0 {
- gen_counter_incr(asm, Counter::send_cfunc_argc_mismatch);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_argc_mismatch);
return None;
}
// Don't JIT functions that need C stack arguments for now
if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) {
- gen_counter_incr(asm, Counter::send_cfunc_toomany_args);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_toomany_args);
return None;
}
- let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0;
- let block_arg_type = if block_arg {
+ let mut block_arg_type = if block_arg {
Some(asm.ctx.get_opnd_type(StackOpnd(0)))
} else {
None
@@ -5599,41 +7016,41 @@ fn gen_send_cfunc(
match block_arg_type {
Some(Type::Nil | Type::BlockParamProxy) => {
- // We'll handle this later
- }
- None => {
- // Nothing to do
- }
- _ => {
- gen_counter_incr(asm, Counter::send_cfunc_block_arg);
- return None;
- }
- }
-
- match block_arg_type {
- Some(Type::Nil) => {
- // We have a nil block arg, so let's pop it off the args
+ // We don't need the actual stack value for these
asm.stack_pop(1);
}
- Some(Type::BlockParamProxy) => {
- // We don't need the actual stack value
+ Some(Type::Unknown | Type::UnknownImm) if jit.peek_at_stack(&asm.ctx, 0).nil_p() => {
+ // The sample blockarg is nil, so speculate that's the case.
+ asm.cmp(asm.stack_opnd(0), Qnil.into());
+ asm.jne(Target::side_exit(Counter::guard_send_cfunc_block_not_nil));
+ block_arg_type = Some(Type::Nil);
asm.stack_pop(1);
}
None => {
// Nothing to do
}
_ => {
- assert!(false);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_block_arg);
+ return None;
}
}
+ let block_arg_type = block_arg_type; // drop `mut`
- // push_splat_args does stack manipulation so we can no longer side exit
- if flags & VM_CALL_ARGS_SPLAT != 0 {
- assert!(cfunc_argc >= 0);
+ // Pop the empty kw_splat hash
+ if kw_splat {
+ // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil()
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.stack_pop(1);
+ argc -= 1;
+ }
+
+ // Splat handling when C method takes a static number of arguments.
+ // push_splat_args() does stack manipulation so we can no longer side exit
+ if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc >= 0 {
let required_args : u32 = (cfunc_argc as u32).saturating_sub(argc as u32 - 1);
// + 1 because we pass self
if required_args + 1 >= C_ARG_OPNDS.len() as u32 {
- gen_counter_incr(asm, Counter::send_cfunc_toomany_args);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_toomany_args);
return None;
}
@@ -5652,15 +7069,33 @@ fn gen_send_cfunc(
handle_opt_send_shift_stack(asm, argc);
}
+ // Push a dynamic number of items from the splat array to the stack when calling a vargs method
+ let dynamic_splat_size = if variable_splat {
+ asm_comment!(asm, "variable length splat");
+ let stack_splat_array = asm.lea(asm.stack_opnd(0));
+ Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array]))
+ } else {
+ None
+ };
+
// Points to the receiver operand on the stack
let recv = asm.stack_opnd(argc);
// Store incremented PC into current control frame in case callee raises.
jit_save_pc(jit, asm);
- // Increment the stack pointer by 3 (in the callee)
- // sp += 3
- let sp = asm.lea(asm.ctx.sp_opnd((SIZEOF_VALUE as isize) * 3));
+ // Find callee's SP with space for metadata.
+ // Usually sp+3.
+ let sp = if let Some(splat_size) = dynamic_splat_size {
+ // Compute the callee's SP at runtime in case we accept a variable size for the splat array
+ const _: () = assert!(SIZEOF_VALUE == 8, "opting for a shift since mul on A64 takes no immediates");
+ let splat_size_bytes = asm.lshift(splat_size, 3usize.into());
+ // 3 items for method metadata, minus one to remove the splat array
+ let static_stack_top = asm.lea(asm.ctx.sp_opnd(2));
+ asm.add(static_stack_top, splat_size_bytes)
+ } else {
+ asm.lea(asm.ctx.sp_opnd(3))
+ };
let specval = if block_arg_type == Some(Type::BlockParamProxy) {
SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy))
@@ -5673,23 +7108,23 @@ fn gen_send_cfunc(
frame_type |= VM_FRAME_FLAG_CFRAME_KW
}
- gen_push_frame(jit, asm, ControlFrame {
+ perf_call!("gen_send_cfunc: ", gen_push_frame(jit, asm, ControlFrame {
frame_type,
specval,
cme,
recv,
sp,
- pc: if cfg!(debug_assertions) {
+ pc: if cfg!(feature = "runtime_checks") {
Some(!0) // Poison value. Helps to fail fast.
} else {
None // Leave PC uninitialized as cfuncs shouldn't read it
},
iseq: None,
- });
+ }));
asm_comment!(asm, "set ec->cfp");
let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32)));
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), new_cfp);
if !kw_arg.is_null() {
// Build a hash from all kwargs passed
@@ -5722,13 +7157,34 @@ fn gen_send_cfunc(
else if cfunc_argc == -1 {
// The method gets a pointer to the first argument
// rb_f_puts(int argc, VALUE *argv, VALUE recv)
+
+ let passed_argc_opnd = if let Some(splat_size) = dynamic_splat_size {
+ // The final argc is the size of the splat, minus one for the splat array itself
+ asm.add(splat_size, (passed_argc - 1).into())
+ } else {
+ // Without a splat, passed_argc is static
+ Opnd::Imm(passed_argc.into())
+ };
+
vec![
- Opnd::Imm(passed_argc.into()),
- asm.lea(asm.ctx.sp_opnd((-argc * SIZEOF_VALUE_I32) as isize)),
+ passed_argc_opnd,
+ asm.lea(asm.ctx.sp_opnd(-argc)),
asm.stack_opnd(argc),
]
}
- else {
+ // Variadic method taking a Ruby array
+ else if cfunc_argc == -2 {
+ // Slurp up all the arguments into an array
+ let stack_args = asm.lea(asm.ctx.sp_opnd(-argc));
+ let args_array = asm.ccall(
+ rb_ec_ary_new_from_values as _,
+ vec![EC, passed_argc.into(), stack_args]
+ );
+
+ // Example signature:
+ // VALUE neg2_method(VALUE self, VALUE argv)
+ vec![asm.stack_opnd(argc), args_array]
+ } else {
panic!("unexpected cfunc_args: {}", cfunc_argc)
};
@@ -5747,22 +7203,35 @@ fn gen_send_cfunc(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, ret);
+ // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs.
+ // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall().
+ if get_option!(gen_stats) {
+ // Assemble the method name string
+ let mid = unsafe { rb_get_def_original_id((*cme).def) };
+ let name_str = get_method_name(Some(unsafe { (*cme).owner }), mid);
+
+ // Get an index for this cfunc name
+ let cfunc_idx = get_cfunc_idx(&name_str);
+
+ // Increment the counter for this cfunc
+ asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]);
+ }
+
// Pop the stack frame (ec->cfp++)
// Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved
// register
- let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP);
+ let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32);
asm.store(ec_cfp_opnd, CFP);
// cfunc calls may corrupt types
- asm.ctx.clear_local_types();
+ asm.clear_local_types();
// Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
// which allows for sharing the same successor.
// Jump (fall through) to the call continuation block
// We do this to end the current block after the call
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
}
// Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access,
@@ -5817,17 +7286,29 @@ fn get_array_ptr(asm: &mut Assembler, array_reg: Opnd) -> Opnd {
asm.csel_nz(ary_opnd, heap_ptr_opnd)
}
+// Generate RSTRING_PTR
+fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd {
+ asm_comment!(asm, "get string pointer for embedded or heap");
+
+ let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into());
+ let heap_ptr_opnd = asm.load(Opnd::mem(
+ usize::BITS as u8,
+ string_reg,
+ RUBY_OFFSET_RSTRING_AS_HEAP_PTR,
+ ));
+
+ // Load the address of the embedded array
+ // (struct RString *)(obj)->as.ary
+ let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RSTRING_AS_ARY));
+ asm.csel_nz(heap_ptr_opnd, ary_opnd)
+}
+
/// Pushes arguments from an array to the stack. Differs from push splat because
/// the array can have items left over. Array is assumed to be T_ARRAY without guards.
fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembler) {
asm_comment!(asm, "copy_splat_args_for_rest_callee");
- let array_len_opnd = get_array_len(asm, array);
-
- asm_comment!(asm, "guard splat array large enough");
- asm.cmp(array_len_opnd, num_args.into());
- asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few));
-
// Unused operands cause the backend to panic
if num_args == 0 {
return;
@@ -5835,24 +7316,8 @@ fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembl
asm_comment!(asm, "Push arguments from array");
- // Load the address of the embedded array
- // (struct RArray *)(obj)->as.ary
let array_reg = asm.load(array);
-
- // Conditionally load the address of the heap array
- // (struct RArray *)(obj)->as.heap.ptr
- let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
- asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64));
- let heap_ptr_opnd = Opnd::mem(
- usize::BITS as u8,
- array_reg,
- RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
- );
- // Load the address of the embedded array
- // (struct RArray *)(obj)->as.ary
- let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY));
- let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd);
-
+ let ary_opnd = get_array_ptr(asm, array_reg);
for i in 0..num_args {
let top = asm.stack_push(Type::Unknown);
asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32));
@@ -5866,79 +7331,40 @@ fn push_splat_args(required_args: u32, asm: &mut Assembler) {
asm_comment!(asm, "push_splat_args");
let array_opnd = asm.stack_opnd(0);
- let array_reg = asm.load(array_opnd);
-
guard_object_is_array(
asm,
- array_reg,
+ array_opnd,
array_opnd.into(),
Counter::guard_send_splat_not_array,
);
- asm_comment!(asm, "Get array length for embedded or heap");
-
- // Pull out the embed flag to check if it's an embedded array.
- let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
-
- // Get the length of the array
- let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into());
- let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into());
-
- // Conditionally move the length of the heap array
- let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
- asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into());
-
- // Need to repeat this here to deal with register allocation
- let array_opnd = asm.stack_opnd(0);
- let array_reg = asm.load(array_opnd);
-
- let array_len_opnd = Opnd::mem(
- std::os::raw::c_long::BITS as u8,
- array_reg,
- RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
- );
- let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd);
+ let array_len_opnd = get_array_len(asm, array_opnd);
asm_comment!(asm, "Guard for expected splat length");
asm.cmp(array_len_opnd, required_args.into());
asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal));
- asm_comment!(asm, "Check last argument is not ruby2keyword hash");
-
- // Need to repeat this here to deal with register allocation
- let array_reg = asm.load(asm.stack_opnd(0));
-
- let ary_opnd = get_array_ptr(asm, array_reg);
-
- let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32)));
+ // Check last element of array if present
+ if required_args > 0 {
+ asm_comment!(asm, "Check last argument is not ruby2keyword hash");
- guard_object_is_not_ruby2_keyword_hash(
- asm,
- last_array_value,
- Counter::guard_send_splatarray_last_ruby_2_keywords,
- );
+ // Need to repeat this here to deal with register allocation
+ let array_reg = asm.load(asm.stack_opnd(0));
+ let ary_opnd = get_array_ptr(asm, array_reg);
+ let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32)));
+ guard_object_is_not_ruby2_keyword_hash(
+ asm,
+ last_array_value,
+ Counter::guard_send_splatarray_last_ruby2_keywords,
+ );
+ }
asm_comment!(asm, "Push arguments from array");
let array_opnd = asm.stack_pop(1);
if required_args > 0 {
- // Load the address of the embedded array
- // (struct RArray *)(obj)->as.ary
let array_reg = asm.load(array_opnd);
-
- // Conditionally load the address of the heap array
- // (struct RArray *)(obj)->as.heap.ptr
- let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
- asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64));
- let heap_ptr_opnd = Opnd::mem(
- usize::BITS as u8,
- array_reg,
- RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
- );
- // Load the address of the embedded array
- // (struct RArray *)(obj)->as.ary
- let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY));
- let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd);
+ let ary_opnd = get_array_ptr(asm, array_reg);
for i in 0..required_args {
let top = asm.stack_push(Type::Unknown);
@@ -5952,7 +7378,6 @@ fn push_splat_args(required_args: u32, asm: &mut Assembler) {
fn gen_send_bmethod(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
block: Option<BlockHandler>,
@@ -5961,7 +7386,7 @@ fn gen_send_bmethod(
) -> Option<CodegenStatus> {
let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) };
- let proc = unsafe { rb_yjit_get_proc_ptr(procv) };
+ let proc = unsafe { rb_jit_get_proc_ptr(procv) };
let proc_block = unsafe { &(*proc).block };
if proc_block.type_ != block_type_iseq {
@@ -5971,27 +7396,42 @@ fn gen_send_bmethod(
let capture = unsafe { proc_block.as_.captured.as_ref() };
let iseq = unsafe { *capture.code.iseq.as_ref() };
- // Optimize for single ractor mode and avoid runtime check for
- // "defined with an un-shareable Proc in a different Ractor"
- if !assume_single_ractor_mode(jit, asm, ocb) {
- gen_counter_incr(asm, Counter::send_bmethod_ractor);
- return None;
+ if !procv.shareable_p() {
+ let ractor_serial = unsafe { rb_yjit_cme_ractor_serial(cme) };
+ asm_comment!(asm, "guard current ractor == {}", ractor_serial);
+ let current_ractor_serial = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_RACTOR_ID as i32));
+ asm.cmp(current_ractor_serial, ractor_serial.into());
+ asm.jne(Target::side_exit(Counter::send_bmethod_ractor));
}
// Passing a block to a block needs logic different from passing
// a block to a method and sometimes requires allocation. Bail for now.
if block.is_some() {
- gen_counter_incr(asm, Counter::send_bmethod_block_arg);
+ gen_counter_incr(jit, asm, Counter::send_bmethod_block_arg);
return None;
}
let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA;
- gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None)
+ perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) }
+}
+
+/// The kind of a value an ISEQ returns
+enum IseqReturn {
+ Value(VALUE),
+ LocalVariable(u32),
+ Receiver,
}
-/// Return the ISEQ's return value if it consists of only putnil/putobject and leave.
-fn iseq_get_return_value(iseq: IseqPtr) -> Option<VALUE> {
+extern "C" {
+ fn rb_simple_iseq_p(iseq: IseqPtr) -> bool;
+ fn rb_iseq_only_kwparam_p(iseq: IseqPtr) -> bool;
+}
+
+/// Return the ISEQ's return value if it consists of one simple instruction and leave.
+fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, block: Option<BlockHandler>, ci_flags: u32) -> Option<IseqReturn> {
// Expect only two instructions and one possible operand
+ // NOTE: If an ISEQ has an optional keyword parameter with a default value that requires
+ // computation, the ISEQ will always have more than two instructions and won't be inlined.
let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
if !(2..=3).contains(&iseq_size) {
return None;
@@ -6006,10 +7446,52 @@ fn iseq_get_return_value(iseq: IseqPtr) -> Option<VALUE> {
return None;
}
match first_insn {
- YARVINSN_putnil => Some(Qnil),
- YARVINSN_putobject => unsafe { Some(*rb_iseq_pc_at_idx(iseq, 1)) },
- YARVINSN_putobject_INT2FIX_0_ => Some(VALUE::fixnum_from_usize(0)),
- YARVINSN_putobject_INT2FIX_1_ => Some(VALUE::fixnum_from_usize(1)),
+ YARVINSN_getlocal_WC_0 => {
+ // Accept only cases where only positional arguments are used by both the callee and the caller.
+ // Keyword arguments may be specified by the callee or the caller but not used.
+ // Reject block ISEQs to avoid autosplat and other block parameter complications.
+ if captured_opnd.is_some()
+ // Reject if block ISEQ is present
+ || block.is_some()
+ // Equivalent to `VM_CALL_ARGS_SIMPLE - VM_CALL_KWARG - has_block_iseq`
+ || ci_flags & (
+ VM_CALL_ARGS_SPLAT
+ | VM_CALL_KW_SPLAT
+ | VM_CALL_ARGS_BLOCKARG
+ | VM_CALL_FORWARDING
+ ) != 0
+ {
+ return None;
+ }
+
+ let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32();
+ let local_idx = ep_offset_to_local_idx(iseq, ep_offset);
+
+ // Only inline getlocal on a parameter. DCE in the IESQ builder can
+ // make a two-instruction ISEQ that does not return a parameter.
+ if local_idx >= unsafe { get_iseq_body_param_size(iseq) } {
+ return None;
+ }
+
+ if unsafe { rb_simple_iseq_p(iseq) } {
+ return Some(IseqReturn::LocalVariable(local_idx));
+ } else if unsafe { rb_iseq_only_kwparam_p(iseq) } {
+ // Inline if only positional parameters are used
+ if let Ok(i) = i32::try_from(local_idx) {
+ if i < unsafe { rb_get_iseq_body_param_lead_num(iseq) } {
+ return Some(IseqReturn::LocalVariable(local_idx));
+ }
+ }
+ }
+
+ return None;
+ }
+ YARVINSN_putnil => Some(IseqReturn::Value(Qnil)),
+ YARVINSN_putobject => Some(IseqReturn::Value(unsafe { *rb_iseq_pc_at_idx(iseq, 1) })),
+ YARVINSN_putobject_INT2FIX_0_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(0))),
+ YARVINSN_putobject_INT2FIX_1_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(1))),
+ // We don't support invokeblock for now. Such ISEQs are likely not used by blocks anyway.
+ YARVINSN_putself if captured_opnd.is_none() => Some(IseqReturn::Receiver),
_ => None,
}
}
@@ -6017,7 +7499,6 @@ fn iseq_get_return_value(iseq: IseqPtr) -> Option<VALUE> {
fn gen_send_iseq(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
iseq: *const rb_iseq_t,
ci: *const rb_callinfo,
frame_type: u32,
@@ -6042,13 +7523,28 @@ fn gen_send_iseq(
// that the callee could use to know which keywords are unspecified
// (see the `checkkeyword` instruction and check `ruby --dump=insn -e 'def foo(k:itself)=k'`).
// We always need to set up this local if the call goes through.
- let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) };
+ let has_kwrest = unsafe { get_iseq_flags_has_kwrest(iseq) };
+ let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) } || has_kwrest;
let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0;
let iseq_has_rest = unsafe { get_iseq_flags_has_rest(iseq) };
let iseq_has_block_param = unsafe { get_iseq_flags_has_block(iseq) };
+ let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock)
+
+ // Is this iseq tagged as "forwardable"? Iseqs that take `...` as a
+ // parameter are tagged as forwardable (e.g. `def foo(...); end`)
+ let forwarding = unsafe { rb_get_iseq_flags_forwardable(iseq) };
+
+ // If a "forwardable" iseq has been called with a splat, then we _do not_
+ // want to expand the splat to the stack. So we'll only consider this
+ // a splat call if the callee iseq is not forwardable. For example,
+ // we do not want to handle the following code:
+ //
+ // `def foo(...); end; foo(*blah)`
+ let splat_call = (flags & VM_CALL_ARGS_SPLAT != 0) && !forwarding;
+ let kw_splat = (flags & VM_CALL_KW_SPLAT != 0) && !forwarding;
// For computing offsets to callee locals
- let num_params = unsafe { get_iseq_body_param_size(iseq) };
+ let num_params = unsafe { get_iseq_body_param_size(iseq) as i32 };
let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 };
let mut start_pc_offset: u16 = 0;
@@ -6063,35 +7559,49 @@ fn gen_send_iseq(
unsafe { get_cikw_keyword_len(kw_arg) }
};
- // Arity handling and optional parameter setup
- let mut opts_filled = argc - required_num - kw_arg_num;
+ // Arity handling and optional parameter setup for positional arguments.
+ // Splats are handled later.
+ let mut opts_filled = argc - required_num - kw_arg_num - i32::from(kw_splat) - i32::from(splat_call);
let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) };
- // We have a rest parameter so there could be more args
- // than are required + optional. Those will go in rest.
+ // With a rest parameter or a yield to a block,
+ // callers can pass more than required + optional.
// So we cap ops_filled at opt_num.
- if iseq_has_rest {
+ if iseq_has_rest || arg_setup_block {
opts_filled = min(opts_filled, opt_num);
}
let mut opts_missing: i32 = opt_num - opts_filled;
let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0;
+ // Stack index of the splat array
+ let splat_pos = i32::from(block_arg) + i32::from(kw_splat) + kw_arg_num;
exit_if_stack_too_large(iseq)?;
- exit_if_tail_call(asm, ci)?;
- exit_if_has_post(asm, iseq)?;
- exit_if_has_kwrest(asm, iseq)?;
- exit_if_splat_and_ruby2_keywords(asm, jit, flags)?;
- exit_if_has_rest_and_captured(asm, iseq_has_rest, captured_opnd)?;
- exit_if_has_rest_and_supplying_kws(asm, iseq_has_rest, iseq, supplying_kws)?;
- exit_if_supplying_kw_and_has_no_kw(asm, supplying_kws, iseq)?;
- exit_if_supplying_kws_and_accept_no_kwargs(asm, supplying_kws, iseq)?;
- exit_if_splat_and_zsuper(asm, flags)?;
- exit_if_doing_kw_and_splat(asm, doing_kw_call, flags)?;
- exit_if_wrong_number_arguments(asm, opts_filled, flags, opt_num, iseq_has_rest)?;
- exit_if_doing_kw_and_opts_missing(asm, doing_kw_call, opts_missing)?;
- exit_if_has_rest_and_optional_and_block(asm, iseq_has_rest, opt_num, iseq, block_arg)?;
+ exit_if_tail_call(jit, asm, ci)?;
+ exit_if_has_post(jit, asm, iseq)?;
+ exit_if_kwsplat_non_nil(jit, asm, flags, Counter::send_iseq_kw_splat_non_nil)?;
+ exit_if_has_rest_and_captured(jit, asm, iseq_has_rest, captured_opnd)?;
+ exit_if_has_kwrest_and_captured(jit, asm, has_kwrest, captured_opnd)?;
+ exit_if_has_rest_and_supplying_kws(jit, asm, iseq_has_rest, supplying_kws)?;
+ exit_if_supplying_kw_and_has_no_kw(jit, asm, supplying_kws, doing_kw_call)?;
+ exit_if_supplying_kws_and_accept_no_kwargs(jit, asm, supplying_kws, iseq)?;
+ exit_if_doing_kw_and_splat(jit, asm, doing_kw_call, flags)?;
+ if !forwarding {
+ exit_if_wrong_number_arguments(jit, asm, arg_setup_block, opts_filled, flags, opt_num, iseq_has_rest)?;
+ }
+ exit_if_doing_kw_and_opts_missing(jit, asm, doing_kw_call, opts_missing)?;
+ exit_if_has_rest_and_optional_and_block(jit, asm, iseq_has_rest, opt_num, iseq, block_arg)?;
+ if forwarding && flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(jit, asm, Counter::send_iseq_send_forwarding);
+ return None;
+ }
let block_arg_type = exit_if_unsupported_block_arg_type(jit, asm, block_arg)?;
+ // Bail if we can't drop extra arguments for a yield by just popping them
+ if supplying_kws && arg_setup_block && argc > (kw_arg_num + required_num + opt_num) {
+ gen_counter_incr(jit, asm, Counter::send_iseq_complex_discard_extras);
+ return None;
+ }
+
// Block parameter handling. This mirrors setup_parameters_complex().
if iseq_has_block_param {
if unsafe { get_iseq_body_local_iseq(iseq) == iseq } {
@@ -6100,126 +7610,34 @@ fn gen_send_iseq(
// In this case (param.flags.has_block && local_iseq != iseq),
// the block argument is setup as a local variable and requires
// materialization (allocation). Bail.
- gen_counter_incr(asm, Counter::send_iseq_materialized_block);
+ gen_counter_incr(jit, asm, Counter::send_iseq_materialized_block);
return None;
}
}
+ // Check that required keyword arguments are supplied and find any extras
+ // that should go into the keyword rest parameter (**kw_rest).
if doing_kw_call {
- // Here we're calling a method with keyword arguments and specifying
- // keyword arguments at this call site.
-
- // This struct represents the metadata about the callee-specified
- // keyword parameters.
- let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
- let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
- let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
-
- let mut required_kwargs_filled = 0;
-
- if keyword_num > 30 {
- // We have so many keywords that (1 << num) encoded as a FIXNUM
- // (which shifts it left one more) no longer fits inside a 32-bit
- // immediate.
- gen_counter_incr(asm, Counter::send_iseq_too_many_kwargs);
- return None;
- }
-
- // Check that the kwargs being passed are valid
- if supplying_kws {
- // This is the list of keyword arguments that the callee specified
- // in its initial declaration.
- // SAFETY: see compile.c for sizing of this slice.
- let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) };
-
- // Here we're going to build up a list of the IDs that correspond to
- // the caller-specified keyword arguments. If they're not in the
- // same order as the order specified in the callee declaration, then
- // we're going to need to generate some code to swap values around
- // on the stack.
- let kw_arg_keyword_len: usize =
- unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap();
- let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
- for kwarg_idx in 0..kw_arg_keyword_len {
- let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
- caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
- }
-
- // First, we're going to be sure that the names of every
- // caller-specified keyword argument correspond to a name in the
- // list of callee-specified keyword parameters.
- for caller_kwarg in caller_kwargs {
- let search_result = callee_kwargs
- .iter()
- .enumerate() // inject element index
- .find(|(_, &kwarg)| kwarg == caller_kwarg);
-
- match search_result {
- None => {
- // If the keyword was never found, then we know we have a
- // mismatch in the names of the keyword arguments, so we need to
- // bail.
- gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch);
- return None;
- }
- Some((callee_idx, _)) if callee_idx < keyword_required_num => {
- // Keep a count to ensure all required kwargs are specified
- required_kwargs_filled += 1;
- }
- _ => (),
- }
- }
- }
- assert!(required_kwargs_filled <= keyword_required_num);
- if required_kwargs_filled != keyword_required_num {
- gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch);
- return None;
- }
+ gen_iseq_kw_call_checks(jit, asm, iseq, kw_arg, has_kwrest, kw_arg_num)?;
}
- // Check if we need the arg0 splat handling of vm_callee_setup_block_arg()
- // Also known as "autosplat" inside setup_parameters_complex()
- let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock)
- let block_arg0_splat = arg_setup_block && argc == 1 && unsafe {
- (get_iseq_flags_has_lead(iseq) || opt_num > 1)
- && !get_iseq_flags_ambiguous_param0(iseq)
- };
- if block_arg0_splat {
- // If block_arg0_splat, we still need side exits after splat, but
- // doing push_splat_args here disallows it. So bail out.
- if flags & VM_CALL_ARGS_SPLAT != 0 && !iseq_has_rest {
- gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_args_splat);
- return None;
- }
- // The block_arg0_splat implementation is for the rb_simple_iseq_p case,
- // but doing_kw_call means it's not a simple ISEQ.
- if doing_kw_call {
- gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_has_kw);
- return None;
- }
- // The block_arg0_splat implementation cannot deal with optional parameters.
- // This is a setup_parameters_complex() situation and interacts with the
- // starting position of the callee.
- if opt_num > 1 {
- gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_optional);
- return None;
- }
- }
-
- let splat_array_length = if flags & VM_CALL_ARGS_SPLAT != 0 {
- let array = jit.peek_at_stack(&asm.ctx, if block_arg { 1 } else { 0 }) ;
+ let splat_array_length = if splat_call {
+ let array = jit.peek_at_stack(&asm.ctx, splat_pos as isize);
let array_length = if array == Qnil {
0
+ } else if unsafe { !RB_TYPE_P(array, RUBY_T_ARRAY) } {
+ gen_counter_incr(jit, asm, Counter::send_iseq_splat_not_array);
+ return None;
} else {
- unsafe { rb_yjit_array_len(array) as u32}
+ unsafe { rb_jit_array_len(array) as u32}
};
// Arity check accounting for size of the splat. When callee has rest parameters, we insert
// runtime guards later in copy_splat_args_for_rest_callee()
if !iseq_has_rest {
- let supplying = argc - 1 + array_length as i32;
+ let supplying = argc - 1 - i32::from(kw_splat) + array_length as i32;
if (required_num..=required_num + opt_num).contains(&supplying) == false {
- gen_counter_incr(asm, Counter::send_iseq_splat_arity_error);
+ gen_counter_incr(jit, asm, Counter::send_iseq_splat_arity_error);
return None;
}
}
@@ -6233,7 +7651,7 @@ fn gen_send_iseq(
// On a normal splat without rest and option args this is handled
// elsewhere depending on the case
asm_comment!(asm, "Side exit if length doesn't not equal compile time length");
- let array_len_opnd = get_array_len(asm, asm.stack_opnd(if block_arg { 1 } else { 0 }));
+ let array_len_opnd = get_array_len(asm, asm.stack_opnd(splat_pos));
asm.cmp(array_len_opnd, array_length.into());
asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal));
}
@@ -6243,12 +7661,39 @@ fn gen_send_iseq(
None
};
+ // Check if we need the arg0 splat handling of vm_callee_setup_block_arg()
+ // Also known as "autosplat" inside setup_parameters_complex().
+ // Autosplat checks argc == 1 after splat and kwsplat processing, so make
+ // sure to amend this if we start support kw_splat.
+ let block_arg0_splat = arg_setup_block
+ && (argc == 1 || (argc == 2 && splat_array_length == Some(0)))
+ && !supplying_kws && !doing_kw_call
+ && unsafe {
+ (get_iseq_flags_has_lead(iseq) || opt_num > 1)
+ && !get_iseq_flags_ambiguous_param0(iseq)
+ };
+ if block_arg0_splat {
+ // If block_arg0_splat, we still need side exits after splat, but
+ // the splat modifies the stack which breaks side exits. So bail out.
+ if splat_call {
+ gen_counter_incr(jit, asm, Counter::invokeblock_iseq_arg0_args_splat);
+ return None;
+ }
+ // The block_arg0_splat implementation cannot deal with optional parameters.
+ // This is a setup_parameters_complex() situation and interacts with the
+ // starting position of the callee.
+ if opt_num > 1 {
+ gen_counter_incr(jit, asm, Counter::invokeblock_iseq_arg0_optional);
+ return None;
+ }
+ }
+
// Adjust `opts_filled` and `opts_missing` taking
// into account the size of the splat expansion.
if let Some(len) = splat_array_length {
assert_eq!(kw_arg_num, 0); // Due to exit_if_doing_kw_and_splat().
// Simplifies calculation below.
- let num_args = (argc - 1) + len as i32;
+ let num_args = argc - 1 - i32::from(kw_splat) + len as i32;
opts_filled = if num_args >= required_num {
min(num_args - required_num, opt_num)
@@ -6271,12 +7716,16 @@ fn gen_send_iseq(
}
}
+ // Increment total ISEQ send count
+ gen_counter_incr(jit, asm, Counter::num_send_iseq);
+
// Shortcut for special `Primitive.attr! :leaf` builtins
- let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) };
+ let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) };
let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) };
let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) };
let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins
- if let (None, Some(builtin_info), true, false) = (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call) {
+ if let (None, Some(builtin_info), true, false, None | Some(0)) =
+ (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call, splat_array_length) {
let builtin_argc = unsafe { (*builtin_info).argc };
if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) {
// We pop the block arg without using it because:
@@ -6285,23 +7734,30 @@ fn gen_send_iseq(
// adding one requires interpreter changes to support.
if block_arg_type.is_some() {
if iseq_has_block_param {
- gen_counter_incr(asm, Counter::send_iseq_leaf_builtin_block_arg_block_param);
+ gen_counter_incr(jit, asm, Counter::send_iseq_leaf_builtin_block_arg_block_param);
return None;
}
asm.stack_pop(1);
}
- asm_comment!(asm, "inlined leaf builtin");
- gen_counter_incr(asm, Counter::num_send_leaf_builtin);
-
- // Skip this if it doesn't trigger GC
- if builtin_attrs & BUILTIN_ATTR_NO_GC == 0 {
- // The callee may allocate, e.g. Integer#abs on a Bignum.
- // Save SP for GC, save PC for allocation tracing, and prepare
- // for global invalidation after GC's VM lock contention.
- jit_prepare_routine_call(jit, asm);
+ // Pop empty kw_splat hash which passes nothing (exit_if_kwsplat_non_nil())
+ if kw_splat {
+ asm.stack_pop(1);
}
+ // Pop empty splat array which passes nothing
+ if let Some(0) = splat_array_length {
+ asm.stack_pop(1);
+ }
+
+ asm_comment!(asm, "inlined leaf builtin");
+ gen_counter_incr(jit, asm, Counter::num_send_iseq_leaf);
+
+ // The callee may allocate, e.g. Integer#abs on a Bignum.
+ // Save SP for GC, save PC for allocation tracing, and prepare
+ // for global invalidation after GC's VM lock contention.
+ jit_prepare_call_with_gc(jit, asm);
+
// Call the builtin func (ec, recv, arg1, arg2, ...)
let mut args = vec![EC];
@@ -6321,82 +7777,145 @@ fn gen_send_iseq(
// Seems like a safe assumption.
// Let guard chains share the same successor
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
}
}
// Inline simple ISEQs whose return value is known at compile time
- if let (Some(value), None, false) = (iseq_get_return_value(iseq), block_arg_type, opt_send_call) {
+ if let (Some(value), None, false) = (iseq_get_return_value(iseq, captured_opnd, block, flags), block_arg_type, opt_send_call) {
asm_comment!(asm, "inlined simple ISEQ");
- gen_counter_incr(asm, Counter::num_send_inline);
+ gen_counter_incr(jit, asm, Counter::num_send_iseq_inline);
- // Pop receiver and arguments
- asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 });
+ match value {
+ IseqReturn::LocalVariable(local_idx) => {
+ // Put the local variable at the return slot
+ let stack_local = asm.stack_opnd(argc - 1 - local_idx as i32);
+ let stack_return = asm.stack_opnd(argc);
+ asm.mov(stack_return, stack_local);
- // Push the return value
- let stack_ret = asm.stack_push(Type::from(value));
- asm.mov(stack_ret, value.into());
+ // Update the mapping for the return value
+ let mapping = asm.ctx.get_opnd_mapping(stack_local.into());
+ asm.ctx.set_opnd_mapping(stack_return.into(), mapping);
+
+ // Pop everything but the return value
+ asm.stack_pop(argc as usize);
+ }
+ IseqReturn::Value(value) => {
+ // Pop receiver and arguments
+ asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 });
+
+ // Push the return value
+ let stack_ret = asm.stack_push(Type::from(value));
+ asm.mov(stack_ret, value.into());
+ },
+ IseqReturn::Receiver => {
+ // Just pop arguments and leave the receiver on stack
+ asm.stack_pop(argc as usize);
+ }
+ }
// Let guard chains share the same successor
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
}
// Stack overflow check
// Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
asm_comment!(asm, "stack overflow check");
+ const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)");
let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap();
- let locals_offs =
- SIZEOF_VALUE_I32 * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32);
- let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs as isize));
+ let locals_offs = (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE) as i32;
+ let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs));
asm.cmp(CFP, stack_limit);
asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow));
+ if iseq_has_rest && splat_call {
+ // Insert length guard for a call to copy_splat_args_for_rest_callee()
+ // that will come later. We will have made changes to
+ // the stack by spilling or handling __send__ shifting
+ // by the time we get to that code, so we need the
+ // guard here where we can still side exit.
+ let non_rest_arg_count = argc - i32::from(kw_splat) - 1;
+ if non_rest_arg_count < required_num + opt_num {
+ let take_count: u32 = (required_num - non_rest_arg_count + opts_filled)
+ .try_into().unwrap();
+
+ if take_count > 0 {
+ asm_comment!(asm, "guard splat_array_length >= {take_count}");
+
+ let splat_array = asm.stack_opnd(splat_pos);
+ let array_len_opnd = get_array_len(asm, splat_array);
+ asm.cmp(array_len_opnd, take_count.into());
+ asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few));
+ }
+ }
+
+ // All splats need to guard for ruby2_keywords hash. Check with a function call when
+ // splatting into a rest param since the index for the last item in the array is dynamic.
+ asm_comment!(asm, "guard no ruby2_keywords hash in splat");
+ let bad_splat = asm.ccall(rb_yjit_ruby2_keywords_splat_p as _, vec![asm.stack_opnd(splat_pos)]);
+ asm.cmp(bad_splat, 0.into());
+ asm.jnz(Target::side_exit(Counter::guard_send_splatarray_last_ruby2_keywords));
+ }
+
match block_arg_type {
- Some(Type::Nil) => {
+ Some(BlockArg::Nil) => {
// We have a nil block arg, so let's pop it off the args
asm.stack_pop(1);
}
- Some(Type::BlockParamProxy) => {
+ Some(BlockArg::BlockParamProxy) => {
// We don't need the actual stack value
asm.stack_pop(1);
}
- Some(Type::TProc) => {
+ Some(BlockArg::TProc) => {
// Place the proc as the block handler. We do this early because
// the block arg being at the top of the stack gets in the way of
// rest param handling later. Also, since there are C calls that
// come later, we can't hold this value in a register and place it
// near the end when we push a new control frame.
asm_comment!(asm, "guard block arg is a proc");
- // Simple predicate, no need for jit_prepare_routine_call().
+ // Simple predicate, no need for jit_prepare_non_leaf_call().
let is_proc = asm.ccall(rb_obj_is_proc as _, vec![asm.stack_opnd(0)]);
asm.cmp(is_proc, Qfalse.into());
jit_chain_guard(
JCC_JE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_send_block_arg_type,
);
- let callee_ep = -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1;
+ // If this is a forwardable iseq, adjust the stack size accordingly
+ let callee_ep = if forwarding {
+ -1 + num_locals + VM_ENV_DATA_SIZE as i32
+ } else {
+ -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1
+ };
let callee_specval = callee_ep + VM_ENV_DATA_INDEX_SPECVAL;
if callee_specval < 0 {
// Can't write to sp[-n] since that's where the arguments are
- gen_counter_incr(asm, Counter::send_iseq_clobbering_block_arg);
+ gen_counter_incr(jit, asm, Counter::send_iseq_clobbering_block_arg);
+ return None;
+ }
+ if iseq_has_rest || has_kwrest {
+ // The proc would be stored above the current stack top, where GC can't see it
+ gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_gc_unsafe);
return None;
}
let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg
- let callee_specval = asm.ctx.sp_opnd(callee_specval as isize * SIZEOF_VALUE as isize);
+ let callee_specval = asm.ctx.sp_opnd(callee_specval);
asm.store(callee_specval, proc);
}
None => {
// Nothing to do
}
- _ => unreachable!(),
+ }
+
+ if kw_splat {
+ // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil()
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.stack_pop(1);
+ argc -= 1;
}
// push_splat_args does stack manipulation so we can no longer side exit
@@ -6406,7 +7925,7 @@ fn gen_send_iseq(
// an array that has the same length. We will insert guards.
argc = argc - 1 + array_length as i32;
if argc + asm.ctx.get_stack_size() as i32 > MAX_SPLAT_LENGTH {
- gen_counter_incr(asm, Counter::send_splat_too_long);
+ gen_counter_incr(jit, asm, Counter::send_splat_too_long);
return None;
}
push_splat_args(array_length, asm);
@@ -6425,7 +7944,7 @@ fn gen_send_iseq(
jit_save_pc(jit, asm);
gen_save_sp(asm);
- let rest_param_array = if flags & VM_CALL_ARGS_SPLAT != 0 {
+ let rest_param_array = if splat_call {
let non_rest_arg_count = argc - 1;
// We start by dupping the array because someone else might have
// a reference to it. This also normalizes to an ::Array instance.
@@ -6447,8 +7966,7 @@ fn gen_send_iseq(
// diff is >0 so no need to worry about null pointer
asm_comment!(asm, "load pointer to array elements");
- let offset_magnitude = SIZEOF_VALUE as u32 * diff;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(diff as i32));
let values_ptr = asm.lea(values_opnd);
asm_comment!(asm, "prepend stack values to rest array");
@@ -6464,14 +7982,14 @@ fn gen_send_iseq(
// from the array and move them to the stack.
asm_comment!(asm, "take items from splat array");
- let diff: u32 = (required_num - non_rest_arg_count + opts_filled)
+ let take_count: u32 = (required_num - non_rest_arg_count + opts_filled)
.try_into().unwrap();
// Copy required arguments to the stack without modifying the array
- copy_splat_args_for_rest_callee(array, diff, asm);
+ copy_splat_args_for_rest_callee(array, take_count, asm);
// We will now slice the array to give us a new array of the correct size
- let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(diff as u64)]);
+ let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(take_count.into())]);
sliced
} else {
@@ -6492,8 +8010,7 @@ fn gen_send_iseq(
Opnd::UImm(0)
} else {
asm_comment!(asm, "load pointer to array elements");
- let offset_magnitude = SIZEOF_VALUE as u32 * n;
- let values_opnd = asm.ctx.sp_opnd(-(offset_magnitude as isize));
+ let values_opnd = asm.ctx.sp_opnd(-(n as i32));
asm.lea(values_opnd)
};
@@ -6529,146 +8046,26 @@ fn gen_send_iseq(
};
// Store rest param to memory to avoid register shuffle as
// we won't be reading it for the remainder of the block.
- asm.ctx.dealloc_temp_reg(rest_param.stack_idx());
+ asm.ctx.dealloc_reg(rest_param.reg_opnd());
asm.store(rest_param, rest_param_array);
}
- if doing_kw_call {
- // Here we're calling a method with keyword arguments and specifying
- // keyword arguments at this call site.
-
- // Number of positional arguments the callee expects before the first
- // keyword argument
- let args_before_kw = required_num + opt_num;
-
- // This struct represents the metadata about the caller-specified
- // keyword arguments.
- let ci_kwarg = unsafe { vm_ci_kwarg(ci) };
- let caller_keyword_len: usize = if ci_kwarg.is_null() {
- 0
- } else {
- unsafe { get_cikw_keyword_len(ci_kwarg) }
- .try_into()
- .unwrap()
- };
-
- // This struct represents the metadata about the callee-specified
- // keyword parameters.
- let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
-
- asm_comment!(asm, "keyword args");
+ // Pop surplus positional arguments when yielding
+ if arg_setup_block {
+ let extras = argc - required_num - opt_num - kw_arg_num;
+ if extras > 0 {
+ // Checked earlier. If there are keyword args, then
+ // the positional arguments are not at the stack top.
+ assert_eq!(0, kw_arg_num);
- // This is the list of keyword arguments that the callee specified
- // in its initial declaration.
- let callee_kwargs = unsafe { (*keyword).table };
- let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap();
-
- // Here we're going to build up a list of the IDs that correspond to
- // the caller-specified keyword arguments. If they're not in the
- // same order as the order specified in the callee declaration, then
- // we're going to need to generate some code to swap values around
- // on the stack.
- let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs];
-
- for kwarg_idx in 0..caller_keyword_len {
- let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
- caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
- }
- let mut kwarg_idx = caller_keyword_len;
-
- let mut unspecified_bits = 0;
-
- let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
- for callee_idx in keyword_required_num..total_kwargs {
- let mut already_passed = false;
- let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) };
-
- for caller_idx in 0..caller_keyword_len {
- if caller_kwargs[caller_idx] == callee_kwarg {
- already_passed = true;
- break;
- }
- }
-
- if !already_passed {
- // Reserve space on the stack for each default value we'll be
- // filling in (which is done in the next loop). Also increments
- // argc so that the callee's SP is recorded correctly.
- argc += 1;
- let default_arg = asm.stack_push(Type::Unknown);
-
- // callee_idx - keyword->required_num is used in a couple of places below.
- let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap();
- let callee_idx_isize: isize = callee_idx.try_into().unwrap();
- let extra_args = callee_idx_isize - req_num;
-
- //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
- let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) };
-
- if default_value == Qundef {
- // Qundef means that this value is not constant and must be
- // recalculated at runtime, so we record it in unspecified_bits
- // (Qnil is then used as a placeholder instead of Qundef).
- unspecified_bits |= 0x01 << extra_args;
- default_value = Qnil;
- }
-
- asm.mov(default_arg, default_value.into());
-
- caller_kwargs[kwarg_idx] = callee_kwarg;
- kwarg_idx += 1;
- }
- }
-
- assert!(kwarg_idx == total_kwargs);
-
- // Next, we're going to loop through every keyword that was
- // specified by the caller and make sure that it's in the correct
- // place. If it's not we're going to swap it around with another one.
- for kwarg_idx in 0..total_kwargs {
- let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap();
- let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) };
-
- // If the argument is already in the right order, then we don't
- // need to generate any code since the expected value is already
- // in the right place on the stack.
- if callee_kwarg == caller_kwargs[kwarg_idx] {
- continue;
- }
-
- // In this case the argument is not in the right place, so we
- // need to find its position where it _should_ be and swap with
- // that location.
- for swap_idx in (kwarg_idx + 1)..total_kwargs {
- if callee_kwarg == caller_kwargs[swap_idx] {
- // First we're going to generate the code that is going
- // to perform the actual swapping at runtime.
- let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
- let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
- let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw)
- .try_into()
- .unwrap();
- let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw)
- .try_into()
- .unwrap();
- stack_swap(asm, offset0, offset1);
-
- // Next we're going to do some bookkeeping on our end so
- // that we know the order that the arguments are
- // actually in now.
- caller_kwargs.swap(kwarg_idx, swap_idx);
-
- break;
- }
- }
+ asm.stack_pop(extras as usize);
+ argc = required_num + opt_num + kw_arg_num;
}
+ }
- // Keyword arguments cause a special extra local variable to be
- // pushed onto the stack that represents the parameters that weren't
- // explicitly given a value and have a non-constant default.
- let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64();
- asm.ctx.dealloc_temp_reg(asm.stack_opnd(-1).stack_idx()); // avoid using a register for unspecified_bits
- asm.mov(asm.stack_opnd(-1), unspec_opnd.into());
+ // Keyword argument passing
+ if doing_kw_call {
+ argc = gen_iseq_kw_call(jit, asm, kw_arg, iseq, argc, has_kwrest);
}
// Same as vm_callee_setup_block_arg_arg0_check and vm_callee_setup_block_arg_arg0_splat
@@ -6700,47 +8097,60 @@ fn gen_send_iseq(
argc = lead_num;
}
- fn nil_fill(comment: &'static str, fill_range: std::ops::Range<isize>, asm: &mut Assembler) {
+ fn nil_fill(comment: &'static str, fill_range: std::ops::Range<i32>, asm: &mut Assembler) {
if fill_range.is_empty() {
return;
}
asm_comment!(asm, "{}", comment);
for i in fill_range {
- let value_slot = asm.ctx.sp_opnd(i * SIZEOF_VALUE as isize);
+ let value_slot = asm.ctx.sp_opnd(i);
asm.store(value_slot, Qnil.into());
}
}
- // Nil-initialize missing optional parameters
- nil_fill(
- "nil-initialize missing optionals",
- {
- let begin = -(argc as isize) + required_num as isize + opts_filled as isize;
- let end = -(argc as isize) + required_num as isize + opt_num as isize;
+ if !forwarding {
+ // Nil-initialize missing optional parameters
+ nil_fill(
+ "nil-initialize missing optionals",
+ {
+ let begin = -argc + required_num + opts_filled;
+ let end = -argc + required_num + opt_num;
- begin..end
- },
- asm
- );
- // Nil-initialize the block parameter. It's the last parameter local
- if iseq_has_block_param {
- let block_param = asm.ctx.sp_opnd(
- SIZEOF_VALUE as isize * (-(argc as isize) + num_params as isize - 1)
+ begin..end
+ },
+ asm
+ );
+ // Nil-initialize the block parameter. It's the last parameter local
+ if iseq_has_block_param {
+ let block_param = asm.ctx.sp_opnd(-argc + num_params - 1);
+ asm.store(block_param, Qnil.into());
+ }
+ // Nil-initialize non-parameter locals
+ nil_fill(
+ "nil-initialize locals",
+ {
+ let begin = -argc + num_params;
+ let end = -argc + num_locals;
+
+ begin..end
+ },
+ asm
);
- asm.store(block_param, Qnil.into());
}
- // Nil-initialize non-parameter locals
- nil_fill(
- "nil-initialize locals",
- {
- let begin = -(argc as isize) + num_params as isize;
- let end = -(argc as isize) + num_locals as isize;
- begin..end
- },
- asm
- );
+ if forwarding {
+ assert_eq!(1, num_params);
+ // Write the CI in to the stack and ensure that it actually gets
+ // flushed to memory
+ asm_comment!(asm, "put call info for forwarding");
+ let ci_opnd = asm.stack_opnd(-1);
+ asm.ctx.dealloc_reg(ci_opnd.reg_opnd());
+ asm.mov(ci_opnd, VALUE(ci as usize).into());
+
+ // Nil-initialize other locals which are above the CI
+ nil_fill("nil-initialize locals", 1..num_locals, asm);
+ }
// Points to the receiver operand on the stack unless a captured environment is used
let recv = match captured_opnd {
@@ -6748,21 +8158,24 @@ fn gen_send_iseq(
_ => asm.stack_opnd(argc),
};
let captured_self = captured_opnd.is_some();
- let sp_offset = (argc as isize) + if captured_self { 0 } else { 1 };
+ let sp_offset = argc + if captured_self { 0 } else { 1 };
// Store the updated SP on the current frame (pop arguments and receiver)
asm_comment!(asm, "store caller sp");
- let caller_sp = asm.lea(asm.ctx.sp_opnd((SIZEOF_VALUE as isize) * -sp_offset));
+ let caller_sp = asm.lea(asm.ctx.sp_opnd(-sp_offset));
asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp);
// Store the next PC in the current frame
jit_save_pc(jit, asm);
// Adjust the callee's stack pointer
- let offs = (SIZEOF_VALUE as isize) * (
- -(argc as isize) + num_locals as isize + VM_ENV_DATA_SIZE as isize
- );
- let callee_sp = asm.lea(asm.ctx.sp_opnd(offs));
+ let callee_sp = if forwarding {
+ let offs = num_locals + VM_ENV_DATA_SIZE as i32;
+ asm.lea(asm.ctx.sp_opnd(offs))
+ } else {
+ let offs = -argc + num_locals + VM_ENV_DATA_SIZE as i32;
+ asm.lea(asm.ctx.sp_opnd(offs))
+ };
let specval = if let Some(prev_ep) = prev_ep {
// We've already side-exited if the callee expects a block, so we
@@ -6771,16 +8184,16 @@ fn gen_send_iseq(
} else if let Some(captured_opnd) = captured_opnd {
let ep_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32)); // captured->ep
SpecVal::PrevEPOpnd(ep_opnd)
- } else if let Some(Type::TProc) = block_arg_type {
+ } else if let Some(BlockArg::TProc) = block_arg_type {
SpecVal::BlockHandler(Some(BlockHandler::AlreadySet))
- } else if let Some(Type::BlockParamProxy) = block_arg_type {
+ } else if let Some(BlockArg::BlockParamProxy) = block_arg_type {
SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy))
} else {
SpecVal::BlockHandler(block)
};
// Setup the new frame
- gen_push_frame(jit, asm, ControlFrame {
+ perf_call!("gen_send_iseq: ", gen_push_frame(jit, asm, ControlFrame {
frame_type,
specval,
cme,
@@ -6788,22 +8201,25 @@ fn gen_send_iseq(
sp: callee_sp,
iseq: Some(iseq),
pc: None, // We are calling into jitted code, which will set the PC as necessary
- });
+ }));
// No need to set cfp->pc since the callee sets it whenever calling into routines
// that could look at it through jit_save_pc().
// mov(cb, REG0, const_ptr_opnd(start_pc));
// mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
- // Stub so we can return to JITted code
- let return_block = BlockId {
- iseq: jit.iseq,
- idx: jit.next_insn_idx(),
- };
+ // Create a blockid for the callee
+ let callee_blockid = BlockId { iseq, idx: start_pc_offset };
// Create a context for the callee
let mut callee_ctx = Context::default();
+ // If the callee has :inline_block annotation and the callsite has a block ISEQ,
+ // duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
+ if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
+ callee_ctx.set_inline_block(iseq);
+ }
+
// Set the argument types in the callee's context
for arg_idx in 0..argc {
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
@@ -6811,6 +8227,13 @@ fn gen_send_iseq(
callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type);
}
+ // If we're in a forwarding callee, there will be one unknown type
+ // written in to the local table (the caller's CI object)
+ if forwarding {
+ callee_ctx.set_local_type(0, Type::Unknown)
+ }
+
+ // Set the receiver type in the callee's context
let recv_type = if captured_self {
Type::Unknown // we don't track the type information of captured->self for now
} else {
@@ -6818,23 +8241,113 @@ fn gen_send_iseq(
};
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
+ // Spill or preserve argument registers
+ if forwarding {
+ // When forwarding, the callee's local table has only a callinfo,
+ // so we can't map the actual arguments to the callee's locals.
+ asm.spill_regs();
+ } else {
+ // Discover stack temp registers that can be used as the callee's locals
+ let mapped_temps = asm.map_temp_regs_to_args(&mut callee_ctx, argc);
+
+ // Spill stack temps and locals that are not used by the callee.
+ // This must be done before changing the SP register.
+ asm.spill_regs_except(&mapped_temps);
+
+ // If the callee block has been compiled before, spill/move registers to reuse the existing block
+ // for minimizing the number of blocks we need to compile.
+ if let Some(existing_reg_mapping) = find_most_compatible_reg_mapping(callee_blockid, &callee_ctx) {
+ asm_comment!(asm, "reuse maps: {:?} -> {:?}", callee_ctx.get_reg_mapping(), existing_reg_mapping);
+
+ // Spill the registers that are not used in the existing block.
+ // When the same ISEQ is compiled as an entry block, it starts with no registers allocated.
+ for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
+ if existing_reg_mapping.get_reg(reg_opnd).is_none() {
+ match reg_opnd {
+ RegOpnd::Local(local_idx) => {
+ let spilled_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
+ asm.spill_reg(spilled_temp);
+ callee_ctx.dealloc_reg(reg_opnd);
+ }
+ RegOpnd::Stack(_) => unreachable!("callee {:?} should have been spilled", reg_opnd),
+ }
+ }
+ }
+ assert!(callee_ctx.get_reg_mapping().get_reg_opnds().len() <= existing_reg_mapping.get_reg_opnds().len());
+
+ // Load the registers that are spilled in this block but used in the existing block.
+ // When there are multiple callsites, some registers spilled in this block may be used at other callsites.
+ for &reg_opnd in existing_reg_mapping.get_reg_opnds().iter() {
+ if callee_ctx.get_reg_mapping().get_reg(reg_opnd).is_none() {
+ match reg_opnd {
+ RegOpnd::Local(local_idx) => {
+ callee_ctx.alloc_reg(reg_opnd);
+ let loaded_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
+ let loaded_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
+ asm.load_into(Opnd::Reg(loaded_reg), loaded_temp);
+ }
+ RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd),
+ }
+ }
+ }
+ assert_eq!(callee_ctx.get_reg_mapping().get_reg_opnds().len(), existing_reg_mapping.get_reg_opnds().len());
+
+ // Shuffle registers to make the register mappings compatible
+ let mut moves = vec![];
+ for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
+ let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
+ let new_reg = TEMP_REGS[existing_reg_mapping.get_reg(reg_opnd).unwrap()];
+ moves.push((new_reg, Opnd::Reg(old_reg)));
+ }
+ for (reg, opnd) in Assembler::reorder_reg_moves(&moves) {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ callee_ctx.set_reg_mapping(existing_reg_mapping);
+ }
+ }
+
+ // Update SP register for the callee. This must be done after referencing frame.recv,
+ // which may be SP-relative.
+ asm.mov(SP, callee_sp);
+
+ // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
+ // We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall().
+ if get_option!(gen_stats) {
+ // Protect caller-saved registers in case they're used for arguments
+ let mapping = asm.cpush_all();
+
+ // Assemble the ISEQ name string
+ let name_str = get_iseq_name(iseq);
+
+ // Get an index for this ISEQ name
+ let iseq_idx = get_iseq_idx(&name_str);
+
+ // Increment the counter for this cfunc
+ asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
+ asm.cpop_all(mapping);
+ }
+
// The callee might change locals through Kernel#binding and other means.
- asm.ctx.clear_local_types();
+ asm.clear_local_types();
// Pop arguments and receiver in return context and
// mark it as a continuation of gen_leave()
- let mut return_asm = Assembler::new();
+ let mut return_asm = Assembler::new(jit.num_locals());
return_asm.ctx = asm.ctx;
return_asm.stack_pop(sp_offset.try_into().unwrap());
return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above
- return_asm.ctx.reset_chain_depth();
+ return_asm.ctx.reset_chain_depth_and_defer();
return_asm.ctx.set_as_return_landing();
+ // Stub so we can return to JITted code
+ let return_block = BlockId {
+ iseq: jit.iseq,
+ idx: jit.next_insn_idx(),
+ };
+
// Write the JIT return address on the callee frame
- gen_branch(
- jit,
+ jit.gen_branch(
asm,
- ocb,
return_block,
&return_asm.ctx,
None,
@@ -6846,96 +8359,414 @@ fn gen_send_iseq(
asm_comment!(asm, "switch to new CFP");
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
// Directly jump to the entry point of the callee
gen_direct_jump(
jit,
&callee_ctx,
- BlockId {
- iseq: iseq,
- idx: start_pc_offset,
- },
+ callee_blockid,
asm,
);
Some(EndBlock)
}
+// Check if we can handle a keyword call
+fn gen_iseq_kw_call_checks(
+ jit: &JITState,
+ asm: &mut Assembler,
+ iseq: *const rb_iseq_t,
+ kw_arg: *const rb_callinfo_kwarg,
+ has_kwrest: bool,
+ caller_kw_num: i32
+) -> Option<()> {
+ // This struct represents the metadata about the callee-specified
+ // keyword parameters.
+ let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+ let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
+ let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+ let mut required_kwargs_filled = 0;
+
+ if keyword_num > 30 || caller_kw_num > 64 {
+ // We have so many keywords that (1 << num) encoded as a FIXNUM
+ // (which shifts it left one more) no longer fits inside a 32-bit
+ // immediate. Similarly, we use a u64 in case of keyword rest parameter.
+ gen_counter_incr(jit, asm, Counter::send_iseq_too_many_kwargs);
+ return None;
+ }
+
+ // Check that the kwargs being passed are valid
+ if caller_kw_num > 0 {
+ // This is the list of keyword arguments that the callee specified
+ // in its initial declaration.
+ // SAFETY: see compile.c for sizing of this slice.
+ let callee_kwargs = if keyword_num == 0 {
+ &[]
+ } else {
+ unsafe { slice::from_raw_parts((*keyword).table, keyword_num) }
+ };
+
+ // Here we're going to build up a list of the IDs that correspond to
+ // the caller-specified keyword arguments. If they're not in the
+ // same order as the order specified in the callee declaration, then
+ // we're going to need to generate some code to swap values around
+ // on the stack.
+ let kw_arg_keyword_len = caller_kw_num as usize;
+ let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
+ for kwarg_idx in 0..kw_arg_keyword_len {
+ let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
+ caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+ }
+
+ // First, we're going to be sure that the names of every
+ // caller-specified keyword argument correspond to a name in the
+ // list of callee-specified keyword parameters.
+ for caller_kwarg in caller_kwargs {
+ let search_result = callee_kwargs
+ .iter()
+ .enumerate() // inject element index
+ .find(|(_, &kwarg)| kwarg == caller_kwarg);
+
+ match search_result {
+ None if !has_kwrest => {
+ // If the keyword was never found, then we know we have a
+ // mismatch in the names of the keyword arguments, so we need to
+ // bail.
+ gen_counter_incr(jit, asm, Counter::send_iseq_kwargs_mismatch);
+ return None;
+ }
+ Some((callee_idx, _)) if callee_idx < keyword_required_num => {
+ // Keep a count to ensure all required kwargs are specified
+ required_kwargs_filled += 1;
+ }
+ _ => (),
+ }
+ }
+ }
+ assert!(required_kwargs_filled <= keyword_required_num);
+ if required_kwargs_filled != keyword_required_num {
+ gen_counter_incr(jit, asm, Counter::send_iseq_kwargs_mismatch);
+ return None;
+ }
+
+ Some(())
+}
+
+// Codegen for keyword argument handling. Essentially private to gen_send_iseq() since
+// there are a lot of preconditions to check before reaching this code.
+fn gen_iseq_kw_call(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ci_kwarg: *const rb_callinfo_kwarg,
+ iseq: *const rb_iseq_t,
+ mut argc: i32,
+ has_kwrest: bool,
+) -> i32 {
+ let caller_keyword_len_i32: i32 = if ci_kwarg.is_null() {
+ 0
+ } else {
+ unsafe { get_cikw_keyword_len(ci_kwarg) }
+ };
+ let caller_keyword_len: usize = caller_keyword_len_i32.try_into().unwrap();
+ let anon_kwrest = unsafe { rb_get_iseq_flags_anon_kwrest(iseq) && !get_iseq_flags_has_kw(iseq) };
+
+ // This struct represents the metadata about the callee-specified
+ // keyword parameters.
+ let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+
+ asm_comment!(asm, "keyword args");
+
+ // This is the list of keyword arguments that the callee specified
+ // in its initial declaration.
+ let callee_kwargs = unsafe { (*keyword).table };
+ let callee_kw_count_i32: i32 = unsafe { (*keyword).num };
+ let callee_kw_count: usize = callee_kw_count_i32.try_into().unwrap();
+ let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+ // Here we're going to build up a list of the IDs that correspond to
+ // the caller-specified keyword arguments. If they're not in the
+ // same order as the order specified in the callee declaration, then
+ // we're going to need to generate some code to swap values around
+ // on the stack.
+ let mut kwargs_order: Vec<ID> = vec![0; cmp::max(caller_keyword_len, callee_kw_count)];
+ for kwarg_idx in 0..caller_keyword_len {
+ let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
+ kwargs_order[kwarg_idx] = unsafe { rb_sym2id(sym) };
+ }
+
+ let mut unspecified_bits = 0;
+
+ // The stack_opnd() index to the 0th keyword argument.
+ let kwargs_stack_base = caller_keyword_len_i32 - 1;
+
+ // Build the keyword rest parameter hash before we make any changes to the order of
+ // the supplied keyword arguments
+ let kwrest_type = if has_kwrest {
+ c_callable! {
+ fn build_kw_rest(rest_mask: u64, stack_kwargs: *const VALUE, keywords: *const rb_callinfo_kwarg) -> VALUE {
+ if keywords.is_null() {
+ return unsafe { rb_hash_new() };
+ }
+
+ // Use the total number of supplied keywords as a size upper bound
+ let keyword_len = unsafe { (*keywords).keyword_len } as usize;
+ let hash = unsafe { rb_hash_new_with_size(keyword_len as u64) };
+
+ // Put pairs into the kwrest hash as the mask describes
+ for kwarg_idx in 0..keyword_len {
+ if (rest_mask & (1 << kwarg_idx)) != 0 {
+ unsafe {
+ let keyword_symbol = (*keywords).keywords.as_ptr().add(kwarg_idx).read();
+ let keyword_value = stack_kwargs.add(kwarg_idx).read();
+ rb_hash_aset(hash, keyword_symbol, keyword_value);
+ }
+ }
+ }
+ return hash;
+ }
+ }
+
+ asm_comment!(asm, "build kwrest hash");
+
+ // Make a bit mask describing which keywords should go into kwrest.
+ let mut rest_mask: u64 = 0;
+ // Index for one argument that will go into kwrest.
+ let mut rest_collected_idx = None;
+ for (supplied_kw_idx, &supplied_kw) in kwargs_order.iter().take(caller_keyword_len).enumerate() {
+ let mut found = false;
+ for callee_idx in 0..callee_kw_count {
+ let callee_kw = unsafe { callee_kwargs.add(callee_idx).read() };
+ if callee_kw == supplied_kw {
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ rest_mask |= 1 << supplied_kw_idx;
+ if rest_collected_idx.is_none() {
+ rest_collected_idx = Some(supplied_kw_idx as i32);
+ }
+ }
+ }
+
+ let (kwrest, kwrest_type) = if rest_mask == 0 && anon_kwrest {
+ // In case the kwrest hash should be empty and is anonymous in the callee,
+ // we can pass nil instead of allocating. Anonymous kwrest can only be
+ // delegated, and nil is the same as an empty hash when delegating.
+ (Qnil.into(), Type::Nil)
+ } else {
+ // Save PC and SP before allocating
+ jit_save_pc(jit, asm);
+ gen_save_sp(asm);
+
+ // Build the kwrest hash. `struct rb_callinfo_kwarg` is malloc'd, so no GC concerns.
+ let kwargs_start = asm.lea(asm.ctx.sp_opnd(-caller_keyword_len_i32));
+ let hash = asm.ccall(
+ build_kw_rest as _,
+ vec![rest_mask.into(), kwargs_start, Opnd::const_ptr(ci_kwarg.cast())]
+ );
+ (hash, Type::THash)
+ };
+
+ // The kwrest parameter sits after `unspecified_bits` if the callee specifies any
+ // keywords.
+ let stack_kwrest_idx = kwargs_stack_base - callee_kw_count_i32 - i32::from(callee_kw_count > 0);
+ let stack_kwrest = asm.stack_opnd(stack_kwrest_idx);
+ // If `stack_kwrest` already has another argument there, we need to stow it elsewhere
+ // first before putting kwrest there. Use `rest_collected_idx` because that value went
+ // into kwrest so the slot is now free.
+ let kwrest_idx = callee_kw_count + usize::from(callee_kw_count > 0);
+ if let (Some(rest_collected_idx), true) = (rest_collected_idx, kwrest_idx < caller_keyword_len) {
+ let rest_collected = asm.stack_opnd(kwargs_stack_base - rest_collected_idx);
+ let mapping = asm.ctx.get_opnd_mapping(stack_kwrest.into());
+ asm.mov(rest_collected, stack_kwrest);
+ asm.ctx.set_opnd_mapping(rest_collected.into(), mapping);
+ // Update our bookkeeping to inform the reordering step later.
+ kwargs_order[rest_collected_idx as usize] = kwargs_order[kwrest_idx];
+ kwargs_order[kwrest_idx] = 0;
+ }
+ // Put kwrest straight into memory, since we might pop it later
+ asm.ctx.dealloc_reg(stack_kwrest.reg_opnd());
+ asm.mov(stack_kwrest, kwrest);
+ if stack_kwrest_idx >= 0 {
+ asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::MapToStack(kwrest_type));
+ }
+
+ Some(kwrest_type)
+ } else {
+ None
+ };
+
+ // Ensure the stack is large enough for the callee
+ for _ in caller_keyword_len..callee_kw_count {
+ argc += 1;
+ asm.stack_push(Type::Unknown);
+ }
+ // Now this is the stack_opnd() index to the 0th keyword argument.
+ let kwargs_stack_base = kwargs_order.len() as i32 - 1;
+
+ // Next, we're going to loop through every keyword that was
+ // specified by the caller and make sure that it's in the correct
+ // place. If it's not we're going to swap it around with another one.
+ for kwarg_idx in 0..callee_kw_count {
+ let callee_kwarg = unsafe { callee_kwargs.add(kwarg_idx).read() };
+
+ // If the argument is already in the right order, then we don't
+ // need to generate any code since the expected value is already
+ // in the right place on the stack.
+ if callee_kwarg == kwargs_order[kwarg_idx] {
+ continue;
+ }
+
+ // In this case the argument is not in the right place, so we
+ // need to find its position where it _should_ be and swap with
+ // that location.
+ for swap_idx in 0..kwargs_order.len() {
+ if callee_kwarg == kwargs_order[swap_idx] {
+ // First we're going to generate the code that is going
+ // to perform the actual swapping at runtime.
+ let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
+ let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
+ let offset0 = kwargs_stack_base - swap_idx_i32;
+ let offset1 = kwargs_stack_base - kwarg_idx_i32;
+ stack_swap(asm, offset0, offset1);
+
+ // Next we're going to do some bookkeeping on our end so
+ // that we know the order that the arguments are
+ // actually in now.
+ kwargs_order.swap(kwarg_idx, swap_idx);
+
+ break;
+ }
+ }
+ }
+
+ // Now that every caller specified kwarg is in the right place, filling
+ // in unspecified default paramters won't overwrite anything.
+ for kwarg_idx in keyword_required_num..callee_kw_count {
+ if kwargs_order[kwarg_idx] != unsafe { callee_kwargs.add(kwarg_idx).read() } {
+ let default_param_idx = kwarg_idx - keyword_required_num;
+ let mut default_value = unsafe { (*keyword).default_values.add(default_param_idx).read() };
+
+ if default_value == Qundef {
+ // Qundef means that this value is not constant and must be
+ // recalculated at runtime, so we record it in unspecified_bits
+ // (Qnil is then used as a placeholder instead of Qundef).
+ unspecified_bits |= 0x01 << default_param_idx;
+ default_value = Qnil;
+ }
+
+ let default_param = asm.stack_opnd(kwargs_stack_base - kwarg_idx as i32);
+ let param_type = Type::from(default_value);
+ asm.mov(default_param, default_value.into());
+ asm.ctx.set_opnd_mapping(default_param.into(), TempMapping::MapToStack(param_type));
+ }
+ }
+
+ // Pop extra arguments that went into kwrest now that they're at stack top
+ if has_kwrest && caller_keyword_len > callee_kw_count {
+ let extra_kwarg_count = caller_keyword_len - callee_kw_count;
+ asm.stack_pop(extra_kwarg_count);
+ argc = argc - extra_kwarg_count as i32;
+ }
+
+ // Keyword arguments cause a special extra local variable to be
+ // pushed onto the stack that represents the parameters that weren't
+ // explicitly given a value and have a non-constant default.
+ if callee_kw_count > 0 {
+ let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64();
+ let top = asm.stack_push(Type::Fixnum);
+ asm.mov(top, unspec_opnd.into());
+ argc += 1;
+ }
+
+ // The kwrest parameter sits after `unspecified_bits`
+ if let Some(kwrest_type) = kwrest_type {
+ let kwrest = asm.stack_push(kwrest_type);
+ // We put the kwrest parameter in memory earlier
+ asm.ctx.dealloc_reg(kwrest.reg_opnd());
+ argc += 1;
+ }
+
+ argc
+}
+
/// This is a helper function to allow us to exit early
/// during code generation if a predicate is true.
/// We return Option<()> here because we will be able to
/// short-circuit using the ? operator if we return None.
/// It would be great if rust let you implement ? for your
/// own types, but as of right now they don't.
-fn exit_if(asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> {
+fn exit_if(jit: &JITState, asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> {
if pred {
- gen_counter_incr(asm, counter);
+ gen_counter_incr(jit, asm, counter);
return None
}
Some(())
}
#[must_use]
-fn exit_if_tail_call(asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> {
- exit_if(asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall)
+fn exit_if_tail_call(jit: &JITState, asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> {
+ exit_if(jit, asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall)
}
#[must_use]
-fn exit_if_has_post(asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> {
- exit_if(asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post)
+fn exit_if_has_post(jit: &JITState, asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> {
+ exit_if(jit, asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post)
}
#[must_use]
-fn exit_if_has_kwrest(asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> {
- exit_if(asm, unsafe { get_iseq_flags_has_kwrest(iseq) }, Counter::send_iseq_has_kwrest)
+fn exit_if_kwsplat_non_nil(jit: &JITState, asm: &mut Assembler, flags: u32, counter: Counter) -> Option<()> {
+ let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
+ let kw_splat_stack = StackOpnd((flags & VM_CALL_ARGS_BLOCKARG != 0).into());
+ exit_if(jit, asm, kw_splat && asm.ctx.get_opnd_type(kw_splat_stack) != Type::Nil, counter)
}
#[must_use]
-fn exit_if_splat_and_ruby2_keywords(asm: &mut Assembler, jit: &mut JITState, flags: u32) -> Option<()> {
- // In order to handle backwards compatibility between ruby 3 and 2
- // ruby2_keywords was introduced. It is called only on methods
- // with splat and changes they way they handle them.
- // We are just going to not compile these.
- // https://www.rubydoc.info/stdlib/core/Proc:ruby2_keywords
- exit_if(
- asm,
- unsafe { get_iseq_flags_ruby2_keywords(jit.iseq) } && flags & VM_CALL_ARGS_SPLAT != 0,
- Counter::send_iseq_ruby2_keywords,
- )
+fn exit_if_has_rest_and_captured(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> {
+ exit_if(jit, asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured)
}
#[must_use]
-fn exit_if_has_rest_and_captured(asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> {
- exit_if(asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured)
+fn exit_if_has_kwrest_and_captured(jit: &JITState, asm: &mut Assembler, iseq_has_kwrest: bool, captured_opnd: Option<Opnd>) -> Option<()> {
+ // We need to call a C function to allocate the kwrest hash, but also need to hold the captred
+ // block across the call, which we can't do.
+ exit_if(jit, asm, iseq_has_kwrest && captured_opnd.is_some(), Counter::send_iseq_has_kwrest_and_captured)
}
#[must_use]
-fn exit_if_has_rest_and_supplying_kws(asm: &mut Assembler, iseq_has_rest: bool, iseq: *const rb_iseq_t, supplying_kws: bool) -> Option<()> {
+fn exit_if_has_rest_and_supplying_kws(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, supplying_kws: bool) -> Option<()> {
+ // There can be a gap between the rest parameter array and the supplied keywords, or
+ // no space to put the rest array (e.g. `def foo(*arr, k:) = arr; foo(k: 1)` 1 is
+ // sitting where the rest array should be).
exit_if(
+ jit,
asm,
- iseq_has_rest && unsafe { get_iseq_flags_has_kw(iseq) } && supplying_kws,
+ iseq_has_rest && supplying_kws,
Counter::send_iseq_has_rest_and_kw_supplied,
)
}
#[must_use]
-fn exit_if_supplying_kw_and_has_no_kw(asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> {
- // If we have keyword arguments being passed to a callee that only takes
- // positionals, then we need to allocate a hash. For now we're going to
- // call that too complex and bail.
+fn exit_if_supplying_kw_and_has_no_kw(jit: &JITState, asm: &mut Assembler, supplying_kws: bool, callee_kws: bool) -> Option<()> {
+ // Passing keyword arguments to a callee means allocating a hash and treating
+ // that as a positional argument. Bail for now.
exit_if(
+ jit,
asm,
- supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) },
+ supplying_kws && !callee_kws,
Counter::send_iseq_has_no_kw,
)
}
#[must_use]
-fn exit_if_supplying_kws_and_accept_no_kwargs(asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> {
+fn exit_if_supplying_kws_and_accept_no_kwargs(jit: &JITState, asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> {
// If we have a method accepting no kwargs (**nil), exit if we have passed
// it any kwargs.
exit_if(
+ jit,
asm,
supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) },
Counter::send_iseq_accepts_no_kwarg
@@ -6943,52 +8774,62 @@ fn exit_if_supplying_kws_and_accept_no_kwargs(asm: &mut Assembler, supplying_kws
}
#[must_use]
-fn exit_if_splat_and_zsuper(asm: &mut Assembler, flags: u32) -> Option<()> {
- // zsuper methods are super calls without any arguments.
- // They are also marked as splat, but don't actually have an array
- // they pull arguments from, instead we need to change to call
- // a different method with the current stack.
- exit_if(asm, flags & VM_CALL_ARGS_SPLAT != 0 && flags & VM_CALL_ZSUPER != 0, Counter::send_iseq_zsuper)
-}
-
-#[must_use]
-fn exit_if_doing_kw_and_splat(asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> {
- exit_if(asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw)
+fn exit_if_doing_kw_and_splat(jit: &JITState, asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> {
+ exit_if(jit, asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw)
}
#[must_use]
-fn exit_if_wrong_number_arguments(asm: &mut Assembler, opts_filled: i32, flags: u32, opt_num: i32, iseq_has_rest: bool) -> Option<()> {
+fn exit_if_wrong_number_arguments(
+ jit: &JITState,
+ asm: &mut Assembler,
+ args_setup_block: bool,
+ opts_filled: i32,
+ flags: u32,
+ opt_num: i32,
+ iseq_has_rest: bool,
+) -> Option<()> {
// Too few arguments and no splat to make up for it
let too_few = opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0;
- // Too many arguments and no place to put them (i.e. rest arg)
- let too_many = opts_filled > opt_num && !iseq_has_rest;
+ // Too many arguments and no sink that take them
+ let too_many = opts_filled > opt_num && !(iseq_has_rest || args_setup_block);
- exit_if(asm, too_few || too_many, Counter::send_iseq_arity_error)
+ exit_if(jit, asm, too_few || too_many, Counter::send_iseq_arity_error)
}
#[must_use]
-fn exit_if_doing_kw_and_opts_missing(asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> {
+fn exit_if_doing_kw_and_opts_missing(jit: &JITState, asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> {
// If we have unfilled optional arguments and keyword arguments then we
// would need to adjust the arguments location to account for that.
// For now we aren't handling this case.
- exit_if(asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw)
+ exit_if(jit, asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw)
}
#[must_use]
-fn exit_if_has_rest_and_optional_and_block(asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> {
+fn exit_if_has_rest_and_optional_and_block(jit: &JITState, asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> {
exit_if(
+ jit,
asm,
iseq_has_rest && opt_num != 0 && (unsafe { get_iseq_flags_has_block(iseq) } || block_arg),
Counter::send_iseq_has_rest_opt_and_block
)
}
+#[derive(Clone, Copy)]
+enum BlockArg {
+ Nil,
+ /// A special sentinel value indicating the block parameter should be read from
+ /// the current surrounding cfp
+ BlockParamProxy,
+ /// A proc object. Could be an instance of a subclass of ::rb_cProc
+ TProc,
+}
+
#[must_use]
fn exit_if_unsupported_block_arg_type(
jit: &mut JITState,
asm: &mut Assembler,
supplying_block_arg: bool
-) -> Option<Option<Type>> {
+) -> Option<Option<BlockArg>> {
let block_arg_type = if supplying_block_arg {
asm.ctx.get_opnd_type(StackOpnd(0))
} else {
@@ -6997,19 +8838,18 @@ fn exit_if_unsupported_block_arg_type(
};
match block_arg_type {
- Type::Nil | Type::BlockParamProxy => {
- // We'll handle this later
- Some(Some(block_arg_type))
- }
+ // We'll handle Nil and BlockParamProxy later
+ Type::Nil => Some(Some(BlockArg::Nil)),
+ Type::BlockParamProxy => Some(Some(BlockArg::BlockParamProxy)),
_ if {
let sample_block_arg = jit.peek_at_stack(&asm.ctx, 0);
unsafe { rb_obj_is_proc(sample_block_arg) }.test()
} => {
// Speculate that we'll have a proc as the block arg
- Some(Some(Type::TProc))
+ Some(Some(BlockArg::TProc))
}
_ => {
- gen_counter_incr(asm, Counter::send_iseq_block_arg_type);
+ gen_counter_incr(jit, asm, Counter::send_iseq_block_arg_type);
None
}
}
@@ -7030,7 +8870,6 @@ fn exit_if_stack_too_large(iseq: *const rb_iseq_t) -> Option<()> {
fn gen_struct_aref(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
@@ -7059,6 +8898,13 @@ fn gen_struct_aref(
}
}
+ if c_method_tracing_currently_enabled(jit) {
+ // Struct accesses need fire c_call and c_return events, which we can't support
+ // See :attr-tracing:
+ gen_counter_incr(jit, asm, Counter::send_cfunc_tracing);
+ return None;
+ }
+
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc);
@@ -7085,14 +8931,12 @@ fn gen_struct_aref(
let ret = asm.stack_push(Type::Unknown);
asm.mov(ret, val);
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
}
fn gen_struct_aset(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
@@ -7103,6 +8947,19 @@ fn gen_struct_aset(
return None;
}
+ // If the comptime receiver is frozen, writing a struct member will raise an exception
+ // and we don't want to JIT code to deal with that situation.
+ if comptime_recv.is_frozen() {
+ return None;
+ }
+
+ if c_method_tracing_currently_enabled(jit) {
+ // Struct accesses need fire c_call and c_return events, which we can't support
+ // See :attr-tracing:
+ gen_counter_incr(jit, asm, Counter::send_cfunc_tracing);
+ return None;
+ }
+
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc);
@@ -7116,6 +8973,17 @@ fn gen_struct_aset(
assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+ // Even if the comptime recv was not frozen, future recv may be. So we need to emit a guard
+ // that the recv is not frozen.
+ // We know all structs are heap objects, so we can check the flag directly.
+ let recv = asm.stack_opnd(1);
+ let recv = asm.load(recv);
+ let flags = asm.load(Opnd::mem(VALUE_BITS, recv, RUBY_OFFSET_RBASIC_FLAGS));
+ asm.test(flags, (RUBY_FL_FREEZE as u64).into());
+ asm.jnz(Target::side_exit(Counter::opt_aset_frozen));
+
+ // Not frozen, so we can proceed.
+
asm_comment!(asm, "struct aset");
let val = asm.stack_pop(1);
@@ -7126,8 +8994,7 @@ fn gen_struct_aset(
let ret = asm.stack_push(Type::Unknown);
asm.mov(ret, val);
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
}
// Generate code that calls a method with dynamic dispatch
@@ -7142,9 +9009,14 @@ fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>(
if unsafe { vm_ci_flag((*cd).ci) } & VM_CALL_TAILCALL != 0 {
return None;
}
+ jit_perf_symbol_push!(jit, asm, "gen_send_dynamic", PerfMap::Codegen);
+
+ // Rewind stack_size using ctx.with_stack_size to allow stack_size changes
+ // before you return None.
+ asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc);
// Save PC and SP to prepare for dynamic dispatch
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Dispatch a method
let ret = vm_sendish(asm);
@@ -7159,14 +9031,17 @@ fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>(
// Fix the interpreter SP deviated by vm_sendish
asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), SP);
- gen_counter_incr(asm, Counter::num_send_dynamic);
- Some(KeepCompiling)
+ gen_counter_incr(jit, asm, Counter::num_send_dynamic);
+
+ jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen);
+
+ // End the current block for invalidationg and sharing the same successor
+ jump_to_next_insn(jit, asm)
}
fn gen_send_general(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
cd: *const rb_call_data,
block: Option<BlockHandler>,
) -> Option<CodegenStatus> {
@@ -7185,16 +9060,17 @@ fn gen_send_general(
let mut mid = unsafe { vm_ci_mid(ci) };
let mut flags = unsafe { vm_ci_flag(ci) };
- // Don't JIT calls with keyword splat
- if flags & VM_CALL_KW_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_kw_splat);
- return None;
+ // Defer compilation so we can specialize on class of receiver
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
- // Defer compilation so we can specialize on class of receiver
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ let ci_flags = unsafe { vm_ci_flag(ci) };
+
+ // Dynamic stack layout. No good way to support without inlining.
+ if ci_flags & VM_CALL_FORWARDING != 0 {
+ gen_counter_incr(jit, asm, Counter::send_forwarding);
+ return None;
}
let recv_idx = argc + if flags & VM_CALL_ARGS_BLOCKARG != 0 { 1 } else { 0 };
@@ -7203,56 +9079,59 @@ fn gen_send_general(
assert_eq!(RUBY_T_CLASS, comptime_recv_klass.builtin_type(),
"objects visible to ruby code should have a T_CLASS in their klass field");
+ // Don't compile calls through singleton classes to avoid retaining the receiver.
+ // Make an exception for class methods since classes tend to be retained anyways.
+ // Also compile calls on top_self to help tests.
+ if VALUE(0) != unsafe { FL_TEST(comptime_recv_klass, VALUE(RUBY_FL_SINGLETON as usize)) }
+ && comptime_recv != unsafe { rb_vm_top_self() }
+ && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_CLASS) }
+ && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_MODULE) } {
+ gen_counter_incr(jit, asm, Counter::send_singleton_class);
+ return None;
+ }
+
// Points to the receiver operand on the stack
let recv = asm.stack_opnd(recv_idx);
let recv_opnd: YARVOpnd = recv.into();
// Log the name of the method we're calling to
- #[cfg(feature = "disasm")]
- {
- let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_klass)) };
- let method_name = unsafe { cstr_to_rust_string(rb_id2name(mid)) };
- match (class_name, method_name) {
- (Some(class_name), Some(method_name)) => {
- asm_comment!(asm, "call to {}#{}", class_name, method_name);
- }
- _ => {}
- }
- }
+ asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid));
// Gather some statistics about sends
- gen_counter_incr(asm, Counter::num_send);
+ gen_counter_incr(jit, asm, Counter::num_send);
if let Some(_known_klass) = asm.ctx.get_opnd_type(recv_opnd).known_class() {
- gen_counter_incr(asm, Counter::num_send_known_class);
+ gen_counter_incr(jit, asm, Counter::num_send_known_class);
}
if asm.ctx.get_chain_depth() > 1 {
- gen_counter_incr(asm, Counter::num_send_polymorphic);
+ gen_counter_incr(jit, asm, Counter::num_send_polymorphic);
}
// If megamorphic, let the caller fallback to dynamic dispatch
- if asm.ctx.get_chain_depth() as i32 >= SEND_MAX_DEPTH {
- gen_counter_incr(asm, Counter::send_megamorphic);
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(jit, asm, Counter::send_megamorphic);
return None;
}
- jit_guard_known_klass(
+ perf_call!("gen_send_general: ", jit_guard_known_klass(
jit,
asm,
- ocb,
- comptime_recv_klass,
recv,
recv_opnd,
comptime_recv,
SEND_MAX_DEPTH,
Counter::guard_send_klass_megamorphic,
- );
+ ));
// Do method lookup
let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
if cme.is_null() {
- gen_counter_incr(asm, Counter::send_cme_not_found);
+ gen_counter_incr(jit, asm, Counter::send_cme_not_found);
return None;
}
+ // Load an overloaded cme if applicable. See vm_search_cc().
+ // It allows you to use a faster ISEQ if possible.
+ cme = unsafe { rb_check_overloaded_cme(cme, ci) };
+
let visi = unsafe { METHOD_ENTRY_VISI(cme) };
match visi {
METHOD_VISI_PUBLIC => {
@@ -7262,7 +9141,7 @@ fn gen_send_general(
if flags & VM_CALL_FCALL == 0 {
// Can only call private methods with FCALL callsites.
// (at the moment they are callsites without a receiver or an explicit `self` receiver)
- gen_counter_incr(asm, Counter::send_private_not_fcall);
+ gen_counter_incr(jit, asm, Counter::send_private_not_fcall);
return None;
}
}
@@ -7281,7 +9160,7 @@ fn gen_send_general(
// Register block for invalidation
//assert!(cme->called_id == mid);
- jit.assume_method_lookup_stable(asm, ocb, cme);
+ jit.assume_method_lookup_stable(asm, cme);
// To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
loop {
@@ -7291,37 +9170,58 @@ fn gen_send_general(
VM_METHOD_TYPE_ISEQ => {
let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
- return gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc, None);
+ return perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, None, cme, block, flags, argc, None) };
}
VM_METHOD_TYPE_CFUNC => {
- return gen_send_cfunc(
+ return perf_call! { gen_send_cfunc(
jit,
asm,
- ocb,
ci,
cme,
block,
- &comptime_recv_klass,
+ Some(comptime_recv_klass),
flags,
argc,
- );
+ ) };
}
VM_METHOD_TYPE_IVAR => {
- if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_ivar);
+ // This is a .send call not supported right now for attr_reader
+ if flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(jit, asm, Counter::send_send_attr_reader);
return None;
}
- if argc != 0 {
- // Argument count mismatch. Getters take no arguments.
- gen_counter_incr(asm, Counter::send_getter_arity);
- return None;
+ if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+ match asm.ctx.get_opnd_type(StackOpnd(0)) {
+ Type::Nil | Type::BlockParamProxy => {
+ // Getters ignore the block arg, and these types of block args can be
+ // passed without side-effect (never any `to_proc` call).
+ asm.stack_pop(1);
+ }
+ _ => {
+ gen_counter_incr(jit, asm, Counter::send_getter_block_arg);
+ return None;
+ }
+ }
}
- // This is a .send call not supported right now for getters
- if flags & VM_CALL_OPT_SEND != 0 {
- gen_counter_incr(asm, Counter::send_send_getter);
- return None;
+ if argc != 0 {
+ // Guard for simple splat of empty array
+ if VM_CALL_ARGS_SPLAT == flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG | VM_CALL_KW_SPLAT)
+ && argc == 1 {
+ // Not using chain guards since on failure these likely end up just raising
+ // ArgumentError
+ let splat = asm.stack_opnd(0);
+ guard_object_is_array(asm, splat, splat.into(), Counter::guard_send_getter_splat_non_empty);
+ let splat_len = get_array_len(asm, splat);
+ asm.cmp(splat_len, 0.into());
+ asm.jne(Target::side_exit(Counter::guard_send_getter_splat_non_empty));
+ asm.stack_pop(1);
+ } else {
+ // Argument count mismatch. Getters take no arguments.
+ gen_counter_incr(jit, asm, Counter::send_getter_arity);
+ return None;
+ }
}
if c_method_tracing_currently_enabled(jit) {
@@ -7330,63 +9230,61 @@ fn gen_send_general(
// Handling the C method tracing events for attr_accessor
// methods is easier than regular C methods as we know the
// "method" we are calling into never enables those tracing
- // events. Once global invalidation runs, the code for the
- // attr_accessor is invalidated and we exit at the closest
- // instruction boundary which is always outside of the body of
- // the attr_accessor code.
- gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ // events. We are never inside the code that needs to be
+ // invalidated when invalidation happens.
+ gen_counter_incr(jit, asm, Counter::send_cfunc_tracing);
return None;
}
+ let recv = asm.stack_opnd(0); // the receiver should now be the stack top
let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
- if flags & VM_CALL_ARGS_BLOCKARG != 0 {
- gen_counter_incr(asm, Counter::send_getter_block_arg);
- return None;
- }
-
return gen_get_ivar(
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
comptime_recv,
ivar_name,
recv,
- recv_opnd,
+ recv.into(),
);
}
VM_METHOD_TYPE_ATTRSET => {
+ // This is a .send call not supported right now for attr_writer
+ if flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(jit, asm, Counter::send_send_attr_writer);
+ return None;
+ }
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_attrset);
+ gen_counter_incr(jit, asm, Counter::send_args_splat_attrset);
return None;
}
if flags & VM_CALL_KWARG != 0 {
- gen_counter_incr(asm, Counter::send_attrset_kwargs);
+ gen_counter_incr(jit, asm, Counter::send_attrset_kwargs);
return None;
} else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } {
- gen_counter_incr(asm, Counter::send_ivar_set_method);
+ gen_counter_incr(jit, asm, Counter::send_ivar_set_method);
return None;
} else if c_method_tracing_currently_enabled(jit) {
// Can't generate code for firing c_call and c_return events
// See :attr-tracing:
- gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ gen_counter_incr(jit, asm, Counter::send_cfunc_tracing);
return None;
} else if flags & VM_CALL_ARGS_BLOCKARG != 0 {
- gen_counter_incr(asm, Counter::send_attrset_block_arg);
+ gen_counter_incr(jit, asm, Counter::send_attrset_block_arg);
return None;
} else {
let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
- return gen_set_ivar(jit, asm, ivar_name, flags, argc);
+ return gen_set_ivar(jit, asm, comptime_recv, ivar_name, StackOpnd(1), None);
}
}
// Block method, e.g. define_method(:foo) { :my_block }
VM_METHOD_TYPE_BMETHOD => {
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_bmethod);
+ gen_counter_incr(jit, asm, Counter::send_args_splat_bmethod);
return None;
}
- return gen_send_bmethod(jit, asm, ocb, ci, cme, block, flags, argc);
+ return gen_send_bmethod(jit, asm, ci, cme, block, flags, argc);
}
VM_METHOD_TYPE_ALIAS => {
// Retrieve the aliased method and re-enter the switch
@@ -7396,7 +9294,7 @@ fn gen_send_general(
// Send family of methods, e.g. call/apply
VM_METHOD_TYPE_OPTIMIZED => {
if flags & VM_CALL_ARGS_BLOCKARG != 0 {
- gen_counter_incr(asm, Counter::send_optimized_block_arg);
+ gen_counter_incr(jit, asm, Counter::send_optimized_block_arg);
return None;
}
@@ -7414,12 +9312,12 @@ fn gen_send_general(
// currently work, we can't do stack manipulation until we will no longer
// side exit.
if flags & VM_CALL_OPT_SEND != 0 {
- gen_counter_incr(asm, Counter::send_send_nested);
+ gen_counter_incr(jit, asm, Counter::send_send_nested);
return None;
}
if argc == 0 {
- gen_counter_incr(asm, Counter::send_send_wrong_args);
+ gen_counter_incr(jit, asm, Counter::send_send_wrong_args);
return None;
}
@@ -7427,69 +9325,39 @@ fn gen_send_general(
let compile_time_name = jit.peek_at_stack(&asm.ctx, argc as isize);
- if !compile_time_name.string_p() && !compile_time_name.static_sym_p() {
- gen_counter_incr(asm, Counter::send_send_chain_not_string_or_sym);
- return None;
- }
-
mid = unsafe { rb_get_symbol_id(compile_time_name) };
if mid == 0 {
- gen_counter_incr(asm, Counter::send_send_null_mid);
+ // This also rejects method names that need conversion
+ gen_counter_incr(jit, asm, Counter::send_send_null_mid);
return None;
}
cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
if cme.is_null() {
- gen_counter_incr(asm, Counter::send_send_null_cme);
+ gen_counter_incr(jit, asm, Counter::send_send_null_cme);
return None;
}
flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND;
- jit.assume_method_lookup_stable(asm, ocb, cme);
-
- let (known_class, type_mismatch_counter) = {
- if compile_time_name.string_p() {
- (
- unsafe { rb_cString },
- Counter::guard_send_send_chain_not_string,
- )
- } else {
- (
- unsafe { rb_cSymbol },
- Counter::guard_send_send_chain_not_sym,
- )
- }
- };
+ jit.assume_method_lookup_stable(asm, cme);
- let name_opnd = asm.stack_opnd(argc);
- jit_guard_known_klass(
- jit,
+ asm_comment!(
asm,
- ocb,
- known_class,
- name_opnd,
- name_opnd.into(),
- compile_time_name,
- 2, // We have string or symbol, so max depth is 2
- type_mismatch_counter
+ "guard sending method name \'{}\'",
+ unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap_or_else(|| "<unknown>".to_owned()),
);
- // Need to do this here so we don't have too many live
- // values for the register allocator.
- let name_opnd = asm.load(name_opnd);
-
+ let name_opnd = asm.stack_opnd(argc);
let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]);
- asm_comment!(asm, "chain_guard_send");
asm.cmp(symbol_id_opnd, mid.into());
jit_chain_guard(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
- Counter::guard_send_send_chain,
+ Counter::guard_send_send_name_chain,
);
// We have changed the argc, flags, mid, and cme, so we need to re-enter the match
@@ -7498,26 +9366,18 @@ fn gen_send_general(
}
OPTIMIZED_METHOD_TYPE_CALL => {
-
if block.is_some() {
- gen_counter_incr(asm, Counter::send_call_block);
+ gen_counter_incr(jit, asm, Counter::send_call_block);
return None;
}
if flags & VM_CALL_KWARG != 0 {
- gen_counter_incr(asm, Counter::send_call_kwarg);
+ gen_counter_incr(jit, asm, Counter::send_call_kwarg);
return None;
}
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_opt_call);
- return None;
- }
-
- // Optimize for single ractor mode and avoid runtime check for
- // "defined with an un-shareable Proc in a different Ractor"
- if !assume_single_ractor_mode(jit, asm, ocb) {
- gen_counter_incr(asm, Counter::send_call_multi_ractor);
+ gen_counter_incr(jit, asm, Counter::send_args_splat_opt_call);
return None;
}
@@ -7532,7 +9392,7 @@ fn gen_send_general(
let sp = asm.lea(asm.ctx.sp_opnd(0));
// Save the PC and SP because the callee can make Ruby calls
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let kw_splat = flags & VM_CALL_KW_SPLAT;
let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32));
@@ -7550,22 +9410,22 @@ fn gen_send_general(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, ret);
- return Some(KeepCompiling);
+ // End the block to allow invalidating the next instruction
+ return jump_to_next_insn(jit, asm);
}
OPTIMIZED_METHOD_TYPE_BLOCK_CALL => {
- gen_counter_incr(asm, Counter::send_optimized_method_block_call);
+ gen_counter_incr(jit, asm, Counter::send_optimized_method_block_call);
return None;
}
OPTIMIZED_METHOD_TYPE_STRUCT_AREF => {
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_aref);
+ gen_counter_incr(jit, asm, Counter::send_args_splat_aref);
return None;
}
return gen_struct_aref(
jit,
asm,
- ocb,
ci,
cme,
comptime_recv,
@@ -7575,13 +9435,12 @@ fn gen_send_general(
}
OPTIMIZED_METHOD_TYPE_STRUCT_ASET => {
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::send_args_splat_aset);
+ gen_counter_incr(jit, asm, Counter::send_args_splat_aset);
return None;
}
return gen_struct_aset(
jit,
asm,
- ocb,
ci,
cme,
comptime_recv,
@@ -7595,23 +9454,23 @@ fn gen_send_general(
}
}
VM_METHOD_TYPE_ZSUPER => {
- gen_counter_incr(asm, Counter::send_zsuper_method);
+ gen_counter_incr(jit, asm, Counter::send_zsuper_method);
return None;
}
VM_METHOD_TYPE_UNDEF => {
- gen_counter_incr(asm, Counter::send_undef_method);
+ gen_counter_incr(jit, asm, Counter::send_undef_method);
return None;
}
VM_METHOD_TYPE_NOTIMPLEMENTED => {
- gen_counter_incr(asm, Counter::send_not_implemented_method);
+ gen_counter_incr(jit, asm, Counter::send_not_implemented_method);
return None;
}
VM_METHOD_TYPE_MISSING => {
- gen_counter_incr(asm, Counter::send_missing_method);
+ gen_counter_incr(jit, asm, Counter::send_missing_method);
return None;
}
VM_METHOD_TYPE_REFINED => {
- gen_counter_incr(asm, Counter::send_refined_method);
+ gen_counter_incr(jit, asm, Counter::send_refined_method);
return None;
}
_ => {
@@ -7621,6 +9480,35 @@ fn gen_send_general(
}
}
+/// Get class name from a class pointer.
+fn get_class_name(class: Option<VALUE>) -> String {
+ class.filter(|&class| {
+ // type checks for rb_class2name()
+ unsafe { RB_TYPE_P(class, RUBY_T_MODULE) || RB_TYPE_P(class, RUBY_T_CLASS) }
+ }).and_then(|class| unsafe {
+ cstr_to_rust_string(rb_class2name(class))
+ }).unwrap_or_else(|| "Unknown".to_string())
+}
+
+/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID
+fn get_method_name(class: Option<VALUE>, mid: u64) -> String {
+ let class_name = get_class_name(class);
+ let method_name = if mid != 0 {
+ unsafe { cstr_to_rust_string(rb_id2name(mid)) }
+ } else {
+ None
+ }.unwrap_or_else(|| "Unknown".to_string());
+ format!("{}#{}", class_name, method_name)
+}
+
+/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ
+fn get_iseq_name(iseq: IseqPtr) -> String {
+ let c_string = unsafe { rb_yjit_iseq_inspect(iseq) };
+ let string = unsafe { CStr::from_ptr(c_string) }.to_str()
+ .unwrap_or_else(|_| "not UTF-8").to_string();
+ unsafe { ruby_xfree(c_string as *mut c_void); }
+ string
+}
/// Shifts the stack for send in order to remove the name of the method
/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c
@@ -7650,11 +9538,10 @@ fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32) {
fn gen_opt_send_without_block(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Generate specialized code if possible
let cd = jit.get_arg(0).as_ptr();
- if let Some(status) = gen_send_general(jit, asm, ocb, cd, None) {
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, None) } {
return Some(status);
}
@@ -7673,12 +9560,11 @@ fn gen_opt_send_without_block(
fn gen_send(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Generate specialized code if possible
let cd = jit.get_arg(0).as_ptr();
let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq));
- if let Some(status) = gen_send_general(jit, asm, ocb, cd, block) {
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } {
return Some(status);
}
@@ -7695,14 +9581,37 @@ fn gen_send(
})
}
+fn gen_sendforward(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq));
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, cd, block) } {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of sendforward
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_sendforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_sendforward as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
+}
+
fn gen_invokeblock(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Generate specialized code if possible
let cd = jit.get_arg(0).as_ptr();
- if let Some(status) = gen_invokeblock_specialized(jit, asm, ocb, cd) {
+ if let Some(status) = gen_invokeblock_specialized(jit, asm, cd) {
return Some(status);
}
@@ -7721,17 +9630,15 @@ fn gen_invokeblock(
fn gen_invokeblock_specialized(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
// Fallback to dynamic dispatch if this callsite is megamorphic
- if asm.ctx.get_chain_depth() as i32 >= SEND_MAX_DEPTH {
- gen_counter_incr(asm, Counter::invokeblock_megamorphic);
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(jit, asm, Counter::invokeblock_megamorphic);
return None;
}
@@ -7747,7 +9654,7 @@ fn gen_invokeblock_specialized(
// Handle each block_handler type
if comptime_handler.0 == VM_BLOCK_HANDLER_NONE as usize { // no block given
- gen_counter_incr(asm, Counter::invokeblock_none);
+ gen_counter_incr(jit, asm, Counter::invokeblock_none);
None
} else if comptime_handler.0 & 0x3 == 0x1 { // VM_BH_ISEQ_BLOCK_P
asm_comment!(asm, "get local EP");
@@ -7763,11 +9670,17 @@ fn gen_invokeblock_specialized(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_invokeblock_tag_changed,
);
+ // If the current ISEQ is annotated to be inlined but it's not being inlined here,
+ // generate a dynamic dispatch to avoid making this yield megamorphic.
+ if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
+ gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined);
+ return None;
+ }
+
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
@@ -7779,33 +9692,19 @@ fn gen_invokeblock_specialized(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_invokeblock_iseq_block_changed,
);
- gen_send_iseq(
- jit,
- asm,
- ocb,
- comptime_iseq,
- ci,
- VM_FRAME_MAGIC_BLOCK,
- None,
- 0 as _,
- None,
- flags,
- argc,
- Some(captured_opnd),
- )
+ perf_call! { gen_send_iseq(jit, asm, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) }
} else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P
// We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet.
if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr(asm, Counter::invokeblock_ifunc_args_splat);
+ gen_counter_incr(jit, asm, Counter::invokeblock_ifunc_args_splat);
return None;
}
if flags & VM_CALL_KW_SPLAT != 0 {
- gen_counter_incr(asm, Counter::invokeblock_ifunc_kw_splat);
+ gen_counter_incr(jit, asm, Counter::invokeblock_ifunc_kw_splat);
return None;
}
@@ -7822,20 +9721,19 @@ fn gen_invokeblock_specialized(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_invokeblock_tag_changed,
);
// The cfunc may not be leaf
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
extern "C" {
fn rb_vm_yield_with_cfunc(ec: EcPtr, captured: *const rb_captured_block, argc: c_int, argv: *const VALUE) -> VALUE;
}
asm_comment!(asm, "call ifunc");
let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3));
- let argv = asm.lea(asm.ctx.sp_opnd((-argc * SIZEOF_VALUE_I32) as isize));
+ let argv = asm.lea(asm.ctx.sp_opnd(-argc));
let ret = asm.ccall(
rb_vm_yield_with_cfunc as *const u8,
vec![EC, captured_opnd, argc.into(), argv],
@@ -7846,16 +9744,15 @@ fn gen_invokeblock_specialized(
asm.mov(stack_ret, ret);
// cfunc calls may corrupt types
- asm.ctx.clear_local_types();
+ asm.clear_local_types();
// Share the successor with other chains
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
} else if comptime_handler.symbol_p() {
- gen_counter_incr(asm, Counter::invokeblock_symbol);
+ gen_counter_incr(jit, asm, Counter::invokeblock_symbol);
None
} else { // Proc
- gen_counter_incr(asm, Counter::invokeblock_proc);
+ gen_counter_incr(jit, asm, Counter::invokeblock_proc);
None
}
}
@@ -7863,15 +9760,14 @@ fn gen_invokeblock_specialized(
fn gen_invokesuper(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Generate specialized code if possible
let cd = jit.get_arg(0).as_ptr();
- if let Some(status) = gen_invokesuper_specialized(jit, asm, ocb, cd) {
+ if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) {
return Some(status);
}
- // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuper
let blockiseq = jit.get_arg(1).as_iseq();
gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
extern "C" {
@@ -7884,16 +9780,37 @@ fn gen_invokesuper(
})
}
+fn gen_invokesuperforward(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ if let Some(status) = gen_invokesuper_specialized(jit, asm, cd) {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of invokesuperforward
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_invokesuperforward as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
+}
+
fn gen_invokesuper_specialized(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on class of receiver
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
// Handle the last two branches of vm_caller_setup_arg_block
@@ -7904,14 +9821,14 @@ fn gen_invokesuper_specialized(
};
// Fallback to dynamic dispatch if this callsite is megamorphic
- if asm.ctx.get_chain_depth() as i32 >= SEND_MAX_DEPTH {
- gen_counter_incr(asm, Counter::invokesuper_megamorphic);
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(jit, asm, Counter::invokesuper_megamorphic);
return None;
}
let me = unsafe { rb_vm_frame_method_entry(jit.get_cfp()) };
if me.is_null() {
- gen_counter_incr(asm, Counter::invokesuper_no_me);
+ gen_counter_incr(jit, asm, Counter::invokesuper_no_me);
return None;
}
@@ -7924,7 +9841,7 @@ fn gen_invokesuper_specialized(
if current_defined_class.builtin_type() == RUBY_T_ICLASS
&& unsafe { RB_TYPE_P((*rbasic_ptr).klass, RUBY_T_MODULE) && FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT.as_usize())) != VALUE(0) }
{
- gen_counter_incr(asm, Counter::invokesuper_refinement);
+ gen_counter_incr(jit, asm, Counter::invokesuper_refinement);
return None;
}
let comptime_superclass =
@@ -7939,11 +9856,15 @@ fn gen_invokesuper_specialized(
// Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
if ci_flags & VM_CALL_KWARG != 0 {
- gen_counter_incr(asm, Counter::invokesuper_kwarg);
+ gen_counter_incr(jit, asm, Counter::invokesuper_kwarg);
return None;
}
if ci_flags & VM_CALL_KW_SPLAT != 0 {
- gen_counter_incr(asm, Counter::invokesuper_kw_splat);
+ gen_counter_incr(jit, asm, Counter::invokesuper_kw_splat);
+ return None;
+ }
+ if ci_flags & VM_CALL_FORWARDING != 0 {
+ gen_counter_incr(jit, asm, Counter::invokesuper_forwarding);
return None;
}
@@ -7954,14 +9875,20 @@ fn gen_invokesuper_specialized(
// check and side exit.
let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize);
if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) {
- gen_counter_incr(asm, Counter::invokesuper_defined_class_mismatch);
+ gen_counter_incr(jit, asm, Counter::invokesuper_defined_class_mismatch);
+ return None;
+ }
+
+ // Don't compile `super` on objects with singleton class to avoid retaining the receiver.
+ if VALUE(0) != unsafe { FL_TEST(comptime_recv.class_of(), VALUE(RUBY_FL_SINGLETON as usize)) } {
+ gen_counter_incr(jit, asm, Counter::invokesuper_singleton_class);
return None;
}
// Do method lookup
let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) };
if cme.is_null() {
- gen_counter_incr(asm, Counter::invokesuper_no_cme);
+ gen_counter_incr(jit, asm, Counter::invokesuper_no_cme);
return None;
}
@@ -7969,7 +9896,7 @@ fn gen_invokesuper_specialized(
let cme_def_type = unsafe { get_cme_def_type(cme) };
if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC {
// others unimplemented
- gen_counter_incr(asm, Counter::invokesuper_not_iseq_or_cfunc);
+ gen_counter_incr(jit, asm, Counter::invokesuper_not_iseq_or_cfunc);
return None;
}
@@ -7987,27 +9914,26 @@ fn gen_invokesuper_specialized(
JCC_JNE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::guard_invokesuper_me_changed,
);
// We need to assume that both our current method entry and the super
// method entry we invoke remain stable
- jit.assume_method_lookup_stable(asm, ocb, me);
- jit.assume_method_lookup_stable(asm, ocb, cme);
+ jit.assume_method_lookup_stable(asm, me);
+ jit.assume_method_lookup_stable(asm, cme);
// Method calls may corrupt types
- asm.ctx.clear_local_types();
+ asm.clear_local_types();
match cme_def_type {
VM_METHOD_TYPE_ISEQ => {
let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
- gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None)
+ perf_call! { gen_send_iseq(jit, asm, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None) }
}
VM_METHOD_TYPE_CFUNC => {
- gen_send_cfunc(jit, asm, ocb, ci, cme, Some(block), ptr::null(), ci_flags, argc)
+ perf_call! { gen_send_cfunc(jit, asm, ci, cme, Some(block), None, ci_flags, argc) }
}
_ => unreachable!(),
}
@@ -8016,7 +9942,6 @@ fn gen_invokesuper_specialized(
fn gen_leave(
_jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Only the return value should be on the stack
assert_eq!(1, asm.ctx.get_stack_size(), "leave instruction expects stack size 1, but was: {}", asm.ctx.get_stack_size());
@@ -8029,7 +9954,7 @@ fn gen_leave(
asm_comment!(asm, "pop stack frame");
let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, incr_cfp);
- asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
+ asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
// Load the return value
let retval_opnd = asm.stack_pop(1);
@@ -8053,12 +9978,11 @@ fn gen_leave(
fn gen_getglobal(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let gid = jit.get_arg(0).as_usize();
// Save the PC and SP because we might make a Ruby call for warning
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let val_opnd = asm.ccall(
rb_gvar_get as *const u8,
@@ -8074,13 +9998,12 @@ fn gen_getglobal(
fn gen_setglobal(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let gid = jit.get_arg(0).as_usize();
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let val = asm.stack_opnd(0);
asm.ccall(
@@ -8098,10 +10021,9 @@ fn gen_setglobal(
fn gen_anytostring(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Save the PC and SP since we might call #to_s
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let str = asm.stack_opnd(0);
let val = asm.stack_opnd(1);
@@ -8119,11 +10041,9 @@ fn gen_anytostring(
fn gen_objtostring(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
let recv = asm.stack_opnd(0);
@@ -8133,8 +10053,6 @@ fn gen_objtostring(
jit_guard_known_klass(
jit,
asm,
- ocb,
- comptime_recv.class_of(),
recv,
recv.into(),
comptime_recv,
@@ -8144,19 +10062,46 @@ fn gen_objtostring(
// No work needed. The string value is already on the top of the stack.
Some(KeepCompiling)
+ } else if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_SYMBOL) } && assume_method_basic_definition(jit, asm, comptime_recv.class_of(), ID!(to_s)) {
+ jit_guard_known_klass(
+ jit,
+ asm,
+ recv,
+ recv.into(),
+ comptime_recv,
+ SEND_MAX_DEPTH,
+ Counter::objtostring_not_string,
+ );
+
+ extern "C" {
+ fn rb_sym2str(sym: VALUE) -> VALUE;
+ }
+
+ // Same optimization done in the interpreter: rb_sym_to_s() allocates a mutable string, but since we are only
+ // going to use this string for interpolation, it's fine to use the
+ // frozen string.
+ // rb_sym2str does not allocate.
+ let sym = recv;
+ let str = asm.ccall(rb_sym2str as *const u8, vec![sym]);
+ asm.stack_pop(1);
+
+ // Push the return value
+ let stack_ret = asm.stack_push(Type::TString);
+ asm.mov(stack_ret, str);
+
+ Some(KeepCompiling)
} else {
let cd = jit.get_arg(0).as_ptr();
- gen_send_general(jit, asm, ocb, cd, None)
+ perf_call! { gen_send_general(jit, asm, cd, None) }
}
}
fn gen_intern(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// Save the PC and SP because we might allocate
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_call_with_gc(jit, asm);
let str = asm.stack_opnd(0);
let sym = asm.ccall(rb_str_intern as *const u8, vec![str]);
@@ -8172,16 +10117,15 @@ fn gen_intern(
fn gen_toregexp(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let opt = jit.get_arg(0).as_i64();
let cnt = jit.get_arg(1).as_usize();
// Save the PC and SP because this allocates an object and could
// raise an exception.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
- let values_ptr = asm.lea(asm.ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize))));
+ let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32)));
let ary = asm.ccall(
rb_ary_tmp_new_from_values as *const u8,
@@ -8223,7 +10167,6 @@ fn gen_toregexp(
fn gen_getspecial(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// This takes two arguments, key and type
// key is only used when type == 0
@@ -8238,7 +10181,7 @@ fn gen_getspecial(
// Fetch a "special" backref based on a char encoded by shifting by 1
// Can raise if matchdata uninitialized
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// call rb_backref_get()
asm_comment!(asm, "rb_backref_get");
@@ -8273,7 +10216,7 @@ fn gen_getspecial(
// Fetch the N-th match from the last backref based on type shifted by 1
// Can raise if matchdata uninitialized
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// call rb_backref_get()
asm_comment!(asm, "rb_backref_get");
@@ -8299,15 +10242,14 @@ fn gen_getspecial(
fn gen_getclassvariable(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// rb_vm_getclassvariable can raise exceptions.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let val_opnd = asm.ccall(
rb_vm_getclassvariable as *const u8,
vec![
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ VALUE(jit.iseq as usize).into(),
CFP,
Opnd::UImm(jit.get_arg(0).as_u64()),
Opnd::UImm(jit.get_arg(1).as_u64()),
@@ -8323,16 +10265,15 @@ fn gen_getclassvariable(
fn gen_setclassvariable(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// rb_vm_setclassvariable can raise exceptions.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let val = asm.stack_opnd(0);
asm.ccall(
rb_vm_setclassvariable as *const u8,
vec![
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ VALUE(jit.iseq as usize).into(),
CFP,
Opnd::UImm(jit.get_arg(0).as_u64()),
val,
@@ -8347,13 +10288,12 @@ fn gen_setclassvariable(
fn gen_getconstant(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let id = jit.get_arg(0).as_usize();
// vm_get_ev_const can raise exceptions.
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
let allow_nil_opnd = asm.stack_opnd(0);
let klass_opnd = asm.stack_opnd(1);
@@ -8382,7 +10322,6 @@ fn gen_getconstant(
fn gen_opt_getconstant_path(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let const_cache_as_value = jit.get_arg(0);
let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr();
@@ -8390,14 +10329,14 @@ fn gen_opt_getconstant_path(
// Make sure there is an exit for this block as the interpreter might want
// to invalidate this block from yjit_constant_ic_update().
- jit_ensure_block_entry_exit(jit, asm, ocb)?;
+ jit_ensure_block_entry_exit(jit, asm)?;
// See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
// If a cache is not filled, fallback to the general C call.
let ice = unsafe { (*ic).entry };
if ice.is_null() {
// Prepare for const_missing
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// If this does not trigger const_missing, vm_ic_update will invalidate this block.
extern "C" {
@@ -8411,15 +10350,19 @@ fn gen_opt_getconstant_path(
let stack_top = asm.stack_push(Type::Unknown);
asm.store(stack_top, val);
- jump_to_next_insn(jit, asm, ocb);
- return Some(EndBlock);
+ return jump_to_next_insn(jit, asm);
}
- if !unsafe { (*ice).ic_cref }.is_null() {
+ let cref_sensitive = !unsafe { (*ice).ic_cref }.is_null();
+ let is_shareable = unsafe { rb_yjit_constcache_shareable(ice) };
+ let needs_checks = cref_sensitive || (!is_shareable && !assume_single_ractor_mode(jit, asm));
+
+ if needs_checks {
// Cache is keyed on a certain lexical scope. Use the interpreter's cache.
let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8));
// Call function to verify the cache. It doesn't allocate or call methods.
+ // This includes a check for Ractor safety
let ret_val = asm.ccall(
rb_vm_ic_hit_p as *const u8,
vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)]
@@ -8448,21 +10391,14 @@ fn gen_opt_getconstant_path(
let stack_top = asm.stack_push(Type::Unknown);
asm.store(stack_top, ic_entry_val);
} else {
- // Optimize for single ractor mode.
- if !assume_single_ractor_mode(jit, asm, ocb) {
- gen_counter_incr(asm, Counter::opt_getconstant_path_multi_ractor);
- return None;
- }
-
// Invalidate output code on any constant writes associated with
// constants referenced within the current block.
- jit.assume_stable_constant_names(asm, ocb, idlist);
+ jit.assume_stable_constant_names(asm, idlist);
jit_putobject(asm, unsafe { (*ice).value });
}
- jump_to_next_insn(jit, asm, ocb);
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
}
// Push the explicit block parameter onto the temporary stack. Part of the
@@ -8471,11 +10407,9 @@ fn gen_opt_getconstant_path(
fn gen_getblockparamproxy(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
- defer_compilation(jit, asm, ocb);
- return Some(EndBlock);
+ if !jit.at_compile_target() {
+ return jit.defer_compilation(asm);
}
// EP level
@@ -8491,7 +10425,7 @@ fn gen_getblockparamproxy(
unsafe { rb_obj_is_proc(comptime_handler) }.test() // block is a Proc
) {
// Missing the symbol case, where we basically need to call Symbol#to_proc at runtime
- gen_counter_incr(asm, Counter::gbpp_unsupported_type);
+ gen_counter_incr(jit, asm, Counter::gbpp_unsupported_type);
return None;
}
@@ -8527,7 +10461,6 @@ fn gen_getblockparamproxy(
JCC_JNZ,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::gbpp_block_handler_not_none,
);
@@ -8547,7 +10480,6 @@ fn gen_getblockparamproxy(
JCC_JZ,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::gbpp_block_handler_not_iseq,
);
@@ -8572,7 +10504,7 @@ fn gen_getblockparamproxy(
}
}
- // Simple predicate, no need to jit_prepare_routine_call()
+ // Simple predicate, no need to jit_prepare_non_leaf_call()
let proc_or_false = asm.ccall(is_proc as _, vec![block_handler]);
// Guard for proc
@@ -8581,7 +10513,6 @@ fn gen_getblockparamproxy(
JCC_JE,
jit,
asm,
- ocb,
SEND_MAX_DEPTH,
Counter::gbpp_block_handler_not_proc,
);
@@ -8592,22 +10523,19 @@ fn gen_getblockparamproxy(
unreachable!("absurd given initial filtering");
}
- jump_to_next_insn(jit, asm, ocb);
-
- Some(EndBlock)
+ jump_to_next_insn(jit, asm)
}
fn gen_getblockparam(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
// EP level
let level = jit.get_arg(1).as_u32();
// Save the PC and SP because we might allocate
- jit_prepare_routine_call(jit, asm);
- asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
+ jit_prepare_call_with_gc(jit, asm);
+ asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency.
// A mirror of the interpreter code. Checking for the case
// where it's pushing rb_block_param_proxy.
@@ -8682,18 +10610,18 @@ fn gen_getblockparam(
fn gen_invokebuiltin(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr();
let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc");
// ec, self, and arguments
if bf_argc + 2 > C_ARG_OPNDS.len() {
+ incr_counter!(invokebuiltin_too_many_args);
return None;
}
// If the calls don't allocate, do they need up to date PC, SP?
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Call the builtin func (ec, recv, arg1, arg2, ...)
let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)];
@@ -8720,7 +10648,6 @@ fn gen_invokebuiltin(
fn gen_opt_invokebuiltin_delegate(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr();
let bf_argc = unsafe { (*bf).argc };
@@ -8728,11 +10655,12 @@ fn gen_opt_invokebuiltin_delegate(
// ec, self, and arguments
if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) {
+ incr_counter!(invokebuiltin_too_many_args);
return None;
}
// If the calls don't allocate, do they need up to date PC, SP?
- jit_prepare_routine_call(jit, asm);
+ jit_prepare_non_leaf_call(jit, asm);
// Call the builtin func (ec, recv, arg1, arg2, ...)
let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)];
@@ -8770,6 +10698,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_dup => Some(gen_dup),
YARVINSN_dupn => Some(gen_dupn),
YARVINSN_swap => Some(gen_swap),
+ YARVINSN_opt_reverse => Some(gen_opt_reverse),
YARVINSN_putnil => Some(gen_putnil),
YARVINSN_putobject => Some(gen_putobject),
YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix),
@@ -8800,13 +10729,20 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_gt => Some(gen_opt_gt),
YARVINSN_opt_ge => Some(gen_opt_ge),
YARVINSN_opt_mod => Some(gen_opt_mod),
+ YARVINSN_opt_ary_freeze => Some(gen_opt_ary_freeze),
+ YARVINSN_opt_hash_freeze => Some(gen_opt_hash_freeze),
YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze),
YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus),
+ YARVINSN_opt_duparray_send => Some(gen_opt_duparray_send),
YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send),
YARVINSN_splatarray => Some(gen_splatarray),
+ YARVINSN_splatkw => Some(gen_splatkw),
YARVINSN_concatarray => Some(gen_concatarray),
+ YARVINSN_concattoarray => Some(gen_concattoarray),
+ YARVINSN_pushtoarray => Some(gen_pushtoarray),
YARVINSN_newrange => Some(gen_newrange),
YARVINSN_putstring => Some(gen_putstring),
+ YARVINSN_putchilledstring => Some(gen_putchilledstring),
YARVINSN_expandarray => Some(gen_expandarray),
YARVINSN_defined => Some(gen_defined),
YARVINSN_definedivar => Some(gen_definedivar),
@@ -8820,7 +10756,6 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_neq => Some(gen_opt_neq),
YARVINSN_opt_aref => Some(gen_opt_aref),
YARVINSN_opt_aset => Some(gen_opt_aset),
- YARVINSN_opt_aref_with => Some(gen_opt_aref_with),
YARVINSN_opt_mult => Some(gen_opt_mult),
YARVINSN_opt_div => Some(gen_opt_div),
YARVINSN_opt_ltlt => Some(gen_opt_ltlt),
@@ -8842,13 +10777,16 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_branchnil => Some(gen_branchnil),
YARVINSN_throw => Some(gen_throw),
YARVINSN_jump => Some(gen_jump),
+ YARVINSN_opt_new => Some(gen_opt_new),
YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy),
YARVINSN_getblockparam => Some(gen_getblockparam),
YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block),
YARVINSN_send => Some(gen_send),
+ YARVINSN_sendforward => Some(gen_sendforward),
YARVINSN_invokeblock => Some(gen_invokeblock),
YARVINSN_invokesuper => Some(gen_invokesuper),
+ YARVINSN_invokesuperforward => Some(gen_invokesuperforward),
YARVINSN_leave => Some(gen_leave),
YARVINSN_getglobal => Some(gen_getglobal),
@@ -8866,18 +10804,17 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
}
}
-// Return true when the codegen function generates code.
-// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
-// See yjit_reg_method().
+/// Return true when the codegen function generates code.
+/// known_recv_class has Some value when the caller has used jit_guard_known_klass().
+/// See [reg_method_codegen]
type MethodGenFn = fn(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
block: Option<BlockHandler>,
argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool;
/// Methods for generating code for hardcoded (usually C) methods
@@ -8889,70 +10826,94 @@ pub fn yjit_reg_method_codegen_fns() {
assert!(METHOD_CODEGEN_TABLE.is_none());
METHOD_CODEGEN_TABLE = Some(HashMap::default());
- // Specialization for C methods. See yjit_reg_method() for details.
- yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
-
- yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
- yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
- yjit_reg_method(rb_mKernel, "is_a?", jit_rb_kernel_is_a);
- yjit_reg_method(rb_mKernel, "kind_of?", jit_rb_kernel_is_a);
- yjit_reg_method(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of);
-
- yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
- yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
- yjit_reg_method(rb_cBasicObject, "!=", jit_rb_obj_not_equal);
- yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
- yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
- yjit_reg_method(rb_cModule, "===", jit_rb_mod_eqq);
- yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
- yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
- yjit_reg_method(rb_cInteger, "==", jit_rb_int_equal);
- yjit_reg_method(rb_cInteger, "===", jit_rb_int_equal);
-
- yjit_reg_method(rb_cInteger, "succ", jit_rb_int_succ);
- yjit_reg_method(rb_cInteger, "/", jit_rb_int_div);
- yjit_reg_method(rb_cInteger, "<<", jit_rb_int_lshift);
- yjit_reg_method(rb_cInteger, "[]", jit_rb_int_aref);
-
- yjit_reg_method(rb_cString, "empty?", jit_rb_str_empty_p);
- yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
- yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
- yjit_reg_method(rb_cString, "length", jit_rb_str_length);
- yjit_reg_method(rb_cString, "size", jit_rb_str_length);
- yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
- yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte);
- yjit_reg_method(rb_cString, "<<", jit_rb_str_concat);
- yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus);
-
- yjit_reg_method(rb_cArray, "empty?", jit_rb_ary_empty_p);
- yjit_reg_method(rb_cArray, "length", jit_rb_ary_length);
- yjit_reg_method(rb_cArray, "size", jit_rb_ary_length);
- yjit_reg_method(rb_cArray, "<<", jit_rb_ary_push);
-
- yjit_reg_method(rb_mKernel, "respond_to?", jit_obj_respond_to);
- yjit_reg_method(rb_mKernel, "block_given?", jit_rb_f_block_given_p);
-
- yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
- }
-}
-
-// Register a specialized codegen function for a particular method. Note that
-// the if the function returns true, the code it generates runs without a
-// control frame and without interrupt checks. To avoid creating observable
-// behavior changes, the codegen function should only target simple code paths
-// that do not allocate and do not make method calls.
-fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
- let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!");
- let mid = unsafe { rb_intern(id_string.as_ptr()) };
+ // Specialization for C methods. See the function's docs for details.
+ reg_method_codegen(rb_cBasicObject, "!", jit_rb_obj_not);
+
+ reg_method_codegen(rb_cNilClass, "nil?", jit_rb_true);
+ reg_method_codegen(rb_mKernel, "nil?", jit_rb_false);
+ reg_method_codegen(rb_mKernel, "is_a?", jit_rb_kernel_is_a);
+ reg_method_codegen(rb_mKernel, "kind_of?", jit_rb_kernel_is_a);
+ reg_method_codegen(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of);
+
+ reg_method_codegen(rb_cBasicObject, "==", jit_rb_obj_equal);
+ reg_method_codegen(rb_cBasicObject, "equal?", jit_rb_obj_equal);
+ reg_method_codegen(rb_cBasicObject, "!=", jit_rb_obj_not_equal);
+ reg_method_codegen(rb_mKernel, "eql?", jit_rb_obj_equal);
+ reg_method_codegen(rb_cModule, "==", jit_rb_obj_equal);
+ reg_method_codegen(rb_cModule, "===", jit_rb_mod_eqq);
+ reg_method_codegen(rb_cModule, "name", jit_rb_mod_name);
+ reg_method_codegen(rb_cSymbol, "==", jit_rb_obj_equal);
+ reg_method_codegen(rb_cSymbol, "===", jit_rb_obj_equal);
+ reg_method_codegen(rb_cInteger, "==", jit_rb_int_equal);
+ reg_method_codegen(rb_cInteger, "===", jit_rb_int_equal);
+
+ reg_method_codegen(rb_cInteger, "succ", jit_rb_int_succ);
+ reg_method_codegen(rb_cInteger, "pred", jit_rb_int_pred);
+ reg_method_codegen(rb_cInteger, "/", jit_rb_int_div);
+ reg_method_codegen(rb_cInteger, "<<", jit_rb_int_lshift);
+ reg_method_codegen(rb_cInteger, ">>", jit_rb_int_rshift);
+ reg_method_codegen(rb_cInteger, "^", jit_rb_int_xor);
+ reg_method_codegen(rb_cInteger, "[]", jit_rb_int_aref);
+
+ reg_method_codegen(rb_cFloat, "+", jit_rb_float_plus);
+ reg_method_codegen(rb_cFloat, "-", jit_rb_float_minus);
+ reg_method_codegen(rb_cFloat, "*", jit_rb_float_mul);
+ reg_method_codegen(rb_cFloat, "/", jit_rb_float_div);
+
+ reg_method_codegen(rb_cString, "dup", jit_rb_str_dup);
+ reg_method_codegen(rb_cString, "empty?", jit_rb_str_empty_p);
+ reg_method_codegen(rb_cString, "to_s", jit_rb_str_to_s);
+ reg_method_codegen(rb_cString, "to_str", jit_rb_str_to_s);
+ reg_method_codegen(rb_cString, "length", jit_rb_str_length);
+ reg_method_codegen(rb_cString, "size", jit_rb_str_length);
+ reg_method_codegen(rb_cString, "bytesize", jit_rb_str_bytesize);
+ reg_method_codegen(rb_cString, "getbyte", jit_rb_str_getbyte);
+ reg_method_codegen(rb_cString, "setbyte", jit_rb_str_setbyte);
+ reg_method_codegen(rb_cString, "byteslice", jit_rb_str_byteslice);
+ reg_method_codegen(rb_cString, "[]", jit_rb_str_aref_m);
+ reg_method_codegen(rb_cString, "slice", jit_rb_str_aref_m);
+ reg_method_codegen(rb_cString, "<<", jit_rb_str_concat);
+ reg_method_codegen(rb_cString, "+@", jit_rb_str_uplus);
+
+ reg_method_codegen(rb_cNilClass, "===", jit_rb_case_equal);
+ reg_method_codegen(rb_cTrueClass, "===", jit_rb_case_equal);
+ reg_method_codegen(rb_cFalseClass, "===", jit_rb_case_equal);
+
+ reg_method_codegen(rb_cArray, "empty?", jit_rb_ary_empty_p);
+ reg_method_codegen(rb_cArray, "length", jit_rb_ary_length);
+ reg_method_codegen(rb_cArray, "size", jit_rb_ary_length);
+ reg_method_codegen(rb_cArray, "<<", jit_rb_ary_push);
+
+ reg_method_codegen(rb_cHash, "empty?", jit_rb_hash_empty_p);
+
+ reg_method_codegen(rb_mKernel, "respond_to?", jit_obj_respond_to);
+ reg_method_codegen(rb_mKernel, "block_given?", jit_rb_f_block_given_p);
+ reg_method_codegen(rb_mKernel, "dup", jit_rb_obj_dup);
+
+ reg_method_codegen(rb_cClass, "superclass", jit_rb_class_superclass);
+
+ reg_method_codegen(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
+ }
+}
+
+/// Register a specialized codegen function for a particular method. Note that
+/// if the function returns true, the code it generates runs without a
+/// control frame and without interrupt checks, completely substituting the
+/// original implementation of the method. To avoid creating observable
+/// behavior changes, prefer targeting simple code paths that do not allocate
+/// and do not make method calls.
+///
+/// See also: [lookup_cfunc_codegen].
+fn reg_method_codegen(klass: VALUE, method_name: &str, gen_fn: MethodGenFn) {
+ let mid = unsafe { rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap()) };
let me = unsafe { rb_method_entry_at(klass, mid) };
if me.is_null() {
- panic!("undefined optimized method!: {mid_str}");
+ panic!("undefined optimized method!: {method_name}");
}
- // For now, only cfuncs are supported
- //RUBY_ASSERT(me && me->def);
- //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
+ // For now, only cfuncs are supported (me->cme cast fine since it's just me->def->type).
+ debug_assert_eq!(VM_METHOD_TYPE_CFUNC, unsafe { get_cme_def_type(me.cast()) });
let method_serial = unsafe {
let def = (*me).def;
@@ -8962,8 +10923,15 @@ fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); }
}
+pub fn yjit_shutdown_free_codegen_table() {
+ unsafe { METHOD_CODEGEN_TABLE = None; };
+}
+
/// Global state needed for code generation
pub struct CodegenGlobals {
+ /// Flat vector of bits to store compressed context data
+ context_data: BitVector,
+
/// Inline code block (fast path)
inline_cb: CodeBlock,
@@ -8994,6 +10962,10 @@ pub struct CodegenGlobals {
/// Page indexes for outlined code that are not associated to any ISEQ.
ocb_pages: Vec<usize>,
+
+ /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push
+ /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap.
+ pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>,
}
/// For implementing global code invalidation. A position in the inline
@@ -9011,11 +10983,11 @@ impl CodegenGlobals {
/// Initialize the codegen globals
pub fn init() {
// Executable memory and code page size in bytes
- let mem_size = get_option!(exec_mem_size);
+ let exec_mem_size = get_option!(exec_mem_size).unwrap_or(get_option!(mem_size));
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
+ let virt_block: *mut u8 = unsafe { rb_jit_reserve_addr_space(exec_mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@@ -9024,7 +10996,7 @@ impl CodegenGlobals {
//
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
- let page_size = unsafe { rb_yjit_get_page_size() };
+ let page_size = unsafe { rb_jit_get_page_size() };
assert_eq!(
virt_block as usize % page_size.as_usize(), 0,
"Start of virtual address block should be page-aligned",
@@ -9037,13 +11009,16 @@ impl CodegenGlobals {
SystemAllocator {},
page_size,
NonNull::new(virt_block).unwrap(),
- mem_size,
+ exec_mem_size,
+ get_option!(mem_size),
);
- let mem_block = Rc::new(RefCell::new(mem_block));
+ let mem_block = Rc::new(mem_block);
let freed_pages = Rc::new(None);
- let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone());
- let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages));
+
+ let asm_comments = get_option_ref!(dump_disasm).is_some();
+ let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone(), asm_comments);
+ let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages, asm_comments));
(cb, ocb)
};
@@ -9051,9 +11026,9 @@ impl CodegenGlobals {
// In test mode we're not linking with the C code
// so we don't allocate executable memory
#[cfg(test)]
- let mut cb = CodeBlock::new_dummy(mem_size / 2);
+ let mut cb = CodeBlock::new_dummy(exec_mem_size / 2);
#[cfg(test)]
- let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
+ let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(exec_mem_size / 2));
let ocb_start_addr = ocb.unwrap().get_write_ptr();
let leave_exit_code = gen_leave_exit(&mut ocb).unwrap();
@@ -9068,15 +11043,16 @@ impl CodegenGlobals {
let cfunc_exit_code = gen_full_cfunc_return(&mut ocb).unwrap();
let ocb_end_addr = ocb.unwrap().get_write_ptr();
- let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr);
+ let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr).collect();
// Mark all code memory as executable
cb.mark_all_executable();
- ocb.unwrap().mark_all_executable();
let codegen_globals = CodegenGlobals {
+ context_data: BitVector::new(),
inline_cb: cb,
outlined_cb: ocb,
+ ocb_pages,
leave_exit_code,
leave_exception_code,
stub_exit_code,
@@ -9084,7 +11060,7 @@ impl CodegenGlobals {
branch_stub_hit_trampoline,
entry_stub_hit_trampoline,
global_inval_patches: Vec::new(),
- ocb_pages,
+ pc_to_cfunc: HashMap::new(),
};
// Initialize the codegen globals instance
@@ -9102,6 +11078,11 @@ impl CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
}
+ /// Get a mutable reference to the context data
+ pub fn get_context_data() -> &'static mut BitVector {
+ &mut CodegenGlobals::get_instance().context_data
+ }
+
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb
@@ -9163,29 +11144,38 @@ impl CodegenGlobals {
pub fn get_ocb_pages() -> &'static Vec<usize> {
&CodegenGlobals::get_instance().ocb_pages
}
+
+ pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> {
+ &mut CodegenGlobals::get_instance().pc_to_cfunc
+ }
}
#[cfg(test)]
mod tests {
use super::*;
- fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) {
+ fn setup_codegen() -> (Context, Assembler, CodeBlock, OutlinedCb) {
let cb = CodeBlock::new_dummy(256 * 1024);
return (
- JITState::new(
- BlockId { iseq: std::ptr::null(), idx: 0 },
- Context::default(),
- cb.get_write_ptr(),
- ptr::null(), // No execution context in tests. No peeking!
- ),
Context::default(),
- Assembler::new(),
+ Assembler::new(0),
cb,
OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)),
);
}
+ fn dummy_jit_state<'a>(cb: &mut CodeBlock, ocb: &'a mut OutlinedCb) -> JITState<'a> {
+ JITState::new(
+ BlockId { iseq: std::ptr::null(), idx: 0 },
+ Context::default(),
+ cb.get_write_ptr(),
+ ptr::null(), // No execution context in tests. No peeking!
+ ocb,
+ true,
+ )
+ }
+
#[test]
fn test_gen_leave_exit() {
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024));
@@ -9195,7 +11185,7 @@ mod tests {
#[test]
fn test_gen_exit() {
- let (_, _ctx, mut asm, mut cb, _) = setup_codegen();
+ let (_ctx, mut asm, mut cb, _) = setup_codegen();
gen_exit(0 as *mut VALUE, &mut asm);
asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
@@ -9203,7 +11193,7 @@ mod tests {
#[test]
fn test_get_side_exit() {
- let (_jit, ctx, mut asm, _, mut ocb) = setup_codegen();
+ let (ctx, mut asm, _, mut ocb) = setup_codegen();
let side_exit_context = SideExitContext::new(0 as _, ctx);
asm.get_side_exit(&side_exit_context, None, &mut ocb);
assert!(ocb.unwrap().get_write_pos() > 0);
@@ -9211,15 +11201,16 @@ mod tests {
#[test]
fn test_gen_check_ints() {
- let (_jit, _ctx, mut asm, _cb, _ocb) = setup_codegen();
+ let (_ctx, mut asm, _cb, _ocb) = setup_codegen();
asm.set_side_exit_context(0 as _, 0);
gen_check_ints(&mut asm, Counter::guard_send_interrupted);
}
#[test]
fn test_gen_nop() {
- let (mut jit, context, mut asm, mut cb, mut ocb) = setup_codegen();
- let status = gen_nop(&mut jit, &mut asm, &mut ocb);
+ let (context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
+ let status = gen_nop(&mut jit, &mut asm);
asm.compile(&mut cb, None).unwrap();
assert_eq!(status, Some(KeepCompiling));
@@ -9229,22 +11220,24 @@ mod tests {
#[test]
fn test_gen_pop() {
- let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen();
+ let (_, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
let context = Context::default();
asm.stack_push(Type::Fixnum);
- let status = gen_pop(&mut jit, &mut asm, &mut ocb);
+ let status = gen_pop(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
let mut default = Context::default();
- default.set_reg_temps(context.get_reg_temps());
+ default.set_reg_mapping(context.get_reg_mapping());
assert_eq!(context.diff(&default), TypeDiff::Compatible(0));
}
#[test]
fn test_gen_dup() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
asm.stack_push(Type::Fixnum);
- let status = gen_dup(&mut jit, &mut asm, &mut ocb);
+ let status = gen_dup(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
@@ -9258,7 +11251,8 @@ mod tests {
#[test]
fn test_gen_dupn() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
asm.stack_push(Type::Fixnum);
asm.stack_push(Type::Flonum);
@@ -9266,7 +11260,7 @@ mod tests {
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_dupn(&mut jit, &mut asm, &mut ocb);
+ let status = gen_dupn(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
@@ -9281,90 +11275,77 @@ mod tests {
}
#[test]
- fn test_gen_swap() {
- let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen();
+ fn test_gen_opt_reverse() {
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
+
+ // Odd number of elements
asm.stack_push(Type::Fixnum);
asm.stack_push(Type::Flonum);
+ asm.stack_push(Type::CString);
- let status = gen_swap(&mut jit, &mut asm, &mut ocb);
+ let mut value_array: [u64; 2] = [0, 3];
+ let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+ jit.pc = pc;
- let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
- let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1));
+ let mut status = gen_opt_reverse(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
- assert_eq!(tmp_type_top, Type::Fixnum);
- assert_eq!(tmp_type_next, Type::Flonum);
- }
- #[test]
- fn test_putnil() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
- let status = gen_putnil(&mut jit, &mut asm, &mut ocb);
+ assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0)));
- let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
+ // Try again with an even number of elements.
+ asm.stack_push(Type::Nil);
+ value_array[1] = 4;
+ status = gen_opt_reverse(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
- assert_eq!(tmp_type_top, Type::Nil);
- asm.compile(&mut cb, None).unwrap();
- assert!(cb.get_write_pos() > 0);
+
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(3)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(2)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0)));
}
#[test]
- fn test_putobject_qtrue() {
- // Test gen_putobject with Qtrue
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
-
- let mut value_array: [u64; 2] = [0, Qtrue.into()];
- let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
- jit.pc = pc;
+ fn test_gen_swap() {
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
+ asm.stack_push(Type::Fixnum);
+ asm.stack_push(Type::Flonum);
- let status = gen_putobject(&mut jit, &mut asm, &mut ocb);
+ let status = gen_swap(&mut jit, &mut asm);
let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
+ let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1));
assert_eq!(status, Some(KeepCompiling));
- assert_eq!(tmp_type_top, Type::True);
- asm.compile(&mut cb, None).unwrap();
- assert!(cb.get_write_pos() > 0);
+ assert_eq!(tmp_type_top, Type::Fixnum);
+ assert_eq!(tmp_type_next, Type::Flonum);
}
#[test]
- fn test_putobject_fixnum() {
- // Test gen_putobject with a Fixnum to test another conditional branch
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
-
- // The Fixnum 7 is encoded as 7 * 2 + 1, or 15
- let mut value_array: [u64; 2] = [0, 15];
- let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
- jit.pc = pc;
-
- let status = gen_putobject(&mut jit, &mut asm, &mut ocb);
+ fn test_putnil() {
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
+ let status = gen_putnil(&mut jit, &mut asm);
let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
assert_eq!(status, Some(KeepCompiling));
- assert_eq!(tmp_type_top, Type::Fixnum);
+ assert_eq!(tmp_type_top, Type::Nil);
asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
}
- #[test]
- fn test_int2fix() {
- let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen();
- jit.opcode = YARVINSN_putobject_INT2FIX_0_.as_usize();
- let status = gen_putobject_int2fix(&mut jit, &mut asm, &mut ocb);
-
- let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
-
- // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally.
- assert_eq!(status, Some(KeepCompiling));
- assert_eq!(tmp_type_top, Type::Fixnum);
- }
#[test]
fn test_putself() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
- let status = gen_putself(&mut jit, &mut asm, &mut ocb);
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
+ let status = gen_putself(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
asm.compile(&mut cb, None).unwrap();
@@ -9373,7 +11354,8 @@ mod tests {
#[test]
fn test_gen_setn() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
asm.stack_push(Type::Fixnum);
asm.stack_push(Type::Flonum);
asm.stack_push(Type::CString);
@@ -9382,7 +11364,7 @@ mod tests {
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_setn(&mut jit, &mut asm, &mut ocb);
+ let status = gen_setn(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
@@ -9396,7 +11378,8 @@ mod tests {
#[test]
fn test_gen_topn() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
asm.stack_push(Type::Flonum);
asm.stack_push(Type::CString);
@@ -9404,7 +11387,7 @@ mod tests {
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_topn(&mut jit, &mut asm, &mut ocb);
+ let status = gen_topn(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
@@ -9418,7 +11401,8 @@ mod tests {
#[test]
fn test_gen_adjuststack() {
- let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
asm.stack_push(Type::Flonum);
asm.stack_push(Type::CString);
asm.stack_push(Type::Fixnum);
@@ -9427,7 +11411,7 @@ mod tests {
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_adjuststack(&mut jit, &mut asm, &mut ocb);
+ let status = gen_adjuststack(&mut jit, &mut asm);
assert_eq!(status, Some(KeepCompiling));
@@ -9439,10 +11423,11 @@ mod tests {
#[test]
fn test_gen_leave() {
- let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen();
+ let (_context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let mut jit = dummy_jit_state(&mut cb, &mut ocb);
// Push return value
asm.stack_push(Type::Fixnum);
asm.set_side_exit_context(0 as _, 0);
- gen_leave(&mut jit, &mut asm, &mut ocb);
+ gen_leave(&mut jit, &mut asm);
}
}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 064a7b5e8f..0590135392 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -15,24 +15,26 @@ use crate::utils::*;
use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
-use std::collections::HashSet;
use std::fmt;
use std::mem;
use std::mem::transmute;
use std::ops::Range;
use std::rc::Rc;
+use std::collections::HashSet;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
use mem::MaybeUninit;
use std::ptr;
use ptr::NonNull;
use YARVOpnd::*;
-use TempMappingKind::*;
+use TempMapping::*;
use crate::invariants::*;
-// Maximum number of temp value types we keep track of
-pub const MAX_TEMP_TYPES: usize = 8;
+// Maximum number of temp value types or registers we keep track of
+pub const MAX_CTX_TEMPS: usize = 8;
-// Maximum number of local variable types we keep track of
-const MAX_LOCAL_TYPES: usize = 8;
+// Maximum number of local variable types or registers we keep track of
+const MAX_CTX_LOCALS: usize = 8;
/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points
/// to a YARV instruction or an instruction operand.
@@ -50,24 +52,20 @@ pub enum Type {
False,
Fixnum,
Flonum,
- Hash,
ImmSymbol,
- #[allow(unused)]
- HeapSymbol,
-
TString, // An object with the T_STRING flag set, possibly an rb_cString
- CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases)
+ CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it)
TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray
-
- TProc, // A proc object. Could be an instance of a subclass of ::rb_cProc
+ CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it)
+ THash, // An object with the T_HASH flag set, possibly an rb_cHash
+ CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it)
BlockParamProxy, // A special sentinel value indicating the block parameter should be read from
// the current surrounding cfp
// The context currently relies on types taking at most 4 bits (max value 15)
- // to encode, so if we add any more, we will need to refactor the context,
- // or we could remove HeapSymbol, which is currently unused.
+ // to encode, so if we add any more, we will need to refactor the context.
}
// Default initialization
@@ -100,8 +98,11 @@ impl Type {
// Core.rs can't reference rb_cString because it's linked by Rust-only tests.
// But CString vs TString is only an optimisation and shouldn't affect correctness.
#[cfg(not(test))]
- if val.class_of() == unsafe { rb_cString } && val.is_frozen() {
- return Type::CString;
+ match val.class_of() {
+ class if class == unsafe { rb_cArray } => return Type::CArray,
+ class if class == unsafe { rb_cHash } => return Type::CHash,
+ class if class == unsafe { rb_cString } => return Type::CString,
+ _ => {}
}
// We likewise can't reference rb_block_param_proxy, but it's again an optimisation;
// we can just treat it as a normal Object.
@@ -111,10 +112,8 @@ impl Type {
}
match val.builtin_type() {
RUBY_T_ARRAY => Type::TArray,
- RUBY_T_HASH => Type::Hash,
+ RUBY_T_HASH => Type::THash,
RUBY_T_STRING => Type::TString,
- #[cfg(not(test))]
- RUBY_T_DATA if unsafe { rb_obj_is_proc(val).test() } => Type::TProc,
_ => Type::UnknownHeap,
}
}
@@ -154,28 +153,29 @@ impl Type {
match self {
Type::UnknownHeap => true,
Type::TArray => true,
- Type::Hash => true,
- Type::HeapSymbol => true,
+ Type::CArray => true,
+ Type::THash => true,
+ Type::CHash => true,
Type::TString => true,
Type::CString => true,
Type::BlockParamProxy => true,
- Type::TProc => true,
_ => false,
}
}
/// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY)
pub fn is_array(&self) -> bool {
- matches!(self, Type::TArray)
+ matches!(self, Type::TArray | Type::CArray)
+ }
+
+ /// Check if it's a T_HASH object (both THash and CHash are T_HASH)
+ pub fn is_hash(&self) -> bool {
+ matches!(self, Type::THash | Type::CHash)
}
/// Check if it's a T_STRING object (both TString and CString are T_STRING)
pub fn is_string(&self) -> bool {
- match self {
- Type::TString => true,
- Type::CString => true,
- _ => false,
- }
+ matches!(self, Type::TString | Type::CString)
}
/// Returns an Option with the T_ value type if it is known, otherwise None
@@ -186,11 +186,10 @@ impl Type {
Type::False => Some(RUBY_T_FALSE),
Type::Fixnum => Some(RUBY_T_FIXNUM),
Type::Flonum => Some(RUBY_T_FLOAT),
- Type::TArray => Some(RUBY_T_ARRAY),
- Type::Hash => Some(RUBY_T_HASH),
- Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL),
+ Type::TArray | Type::CArray => Some(RUBY_T_ARRAY),
+ Type::THash | Type::CHash => Some(RUBY_T_HASH),
+ Type::ImmSymbol => Some(RUBY_T_SYMBOL),
Type::TString | Type::CString => Some(RUBY_T_STRING),
- Type::TProc => Some(RUBY_T_DATA),
Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None,
Type::BlockParamProxy => None,
}
@@ -205,7 +204,9 @@ impl Type {
Type::False => Some(rb_cFalseClass),
Type::Fixnum => Some(rb_cInteger),
Type::Flonum => Some(rb_cFloat),
- Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol),
+ Type::ImmSymbol => Some(rb_cSymbol),
+ Type::CArray => Some(rb_cArray),
+ Type::CHash => Some(rb_cHash),
Type::CString => Some(rb_cString),
_ => None,
}
@@ -256,6 +257,16 @@ impl Type {
return TypeDiff::Compatible(1);
}
+ // A CArray is also a TArray.
+ if self == Type::CArray && dst == Type::TArray {
+ return TypeDiff::Compatible(1);
+ }
+
+ // A CHash is also a THash.
+ if self == Type::CHash && dst == Type::THash {
+ return TypeDiff::Compatible(1);
+ }
+
// A CString is also a TString.
if self == Type::CString && dst == Type::TString {
return TypeDiff::Compatible(1);
@@ -293,91 +304,25 @@ pub enum TypeDiff {
}
#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
-#[repr(u8)]
-pub enum TempMappingKind
-{
- MapToStack = 0,
- MapToSelf = 1,
- MapToLocal = 2,
+pub enum TempMapping {
+ MapToStack(Type),
+ MapToSelf,
+ MapToLocal(u8),
}
-// Potential mapping of a value on the temporary stack to
-// self, a local variable or constant so that we can track its type
-//
-// The highest two bits represent TempMappingKind, and the rest of
-// the bits are used differently across different kinds.
-// * MapToStack: The lowest 5 bits are used for mapping Type.
-// * MapToSelf: The remaining bits are not used; the type is stored in self_type.
-// * MapToLocal: The lowest 3 bits store the index of a local variable.
-#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
-pub struct TempMapping(u8);
-
-impl TempMapping {
- pub fn map_to_stack(t: Type) -> TempMapping
- {
- let kind_bits = TempMappingKind::MapToStack as u8;
- let type_bits = t as u8;
- assert!(type_bits <= 0b11111);
- let bits = (kind_bits << 6) | (type_bits & 0b11111);
- TempMapping(bits)
- }
-
- pub fn map_to_self() -> TempMapping
- {
- let kind_bits = TempMappingKind::MapToSelf as u8;
- let bits = kind_bits << 6;
- TempMapping(bits)
- }
-
- pub fn map_to_local(local_idx: u8) -> TempMapping
- {
- let kind_bits = TempMappingKind::MapToLocal as u8;
- assert!(local_idx <= 0b111);
- let bits = (kind_bits << 6) | (local_idx & 0b111);
- TempMapping(bits)
- }
-
- pub fn without_type(&self) -> TempMapping
- {
- if self.get_kind() != TempMappingKind::MapToStack {
- return *self;
- }
-
- TempMapping::map_to_stack(Type::Unknown)
- }
-
- pub fn get_kind(&self) -> TempMappingKind
- {
- // Take the two highest bits
- let TempMapping(bits) = self;
- let kind_bits = bits >> 6;
- assert!(kind_bits <= 2);
- unsafe { transmute::<u8, TempMappingKind>(kind_bits) }
- }
-
- pub fn get_type(&self) -> Type
- {
- assert!(self.get_kind() == TempMappingKind::MapToStack);
-
- // Take the 5 lowest bits
- let TempMapping(bits) = self;
- let type_bits = bits & 0b11111;
- unsafe { transmute::<u8, Type>(type_bits) }
- }
-
- pub fn get_local_idx(&self) -> u8
- {
- assert!(self.get_kind() == TempMappingKind::MapToLocal);
-
- // Take the 3 lowest bits
- let TempMapping(bits) = self;
- bits & 0b111
+impl Default for TempMapping {
+ fn default() -> Self {
+ TempMapping::MapToStack(Type::default())
}
}
-impl Default for TempMapping {
- fn default() -> Self {
- TempMapping::map_to_stack(Type::Unknown)
+impl TempMapping {
+ /// Return TempMapping without type information in MapToStack
+ pub fn without_type(&self) -> TempMapping {
+ match self {
+ MapToStack(_) => TempMapping::MapToStack(Type::default()),
+ _ => *self,
+ }
}
}
@@ -400,50 +345,127 @@ impl From<Opnd> for YARVOpnd {
}
}
-/// Maximum index of stack temps that could be in a register
-pub const MAX_REG_TEMPS: u8 = 8;
-
-/// Bitmap of which stack temps are in a register
-#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
-pub struct RegTemps(u8);
+/// Number of registers that can be used for stack temps or locals
+pub const MAX_MAPPED_REGS: usize = 5;
-impl RegTemps {
- pub fn get(&self, index: u8) -> bool {
- assert!(index < MAX_REG_TEMPS);
- (self.0 >> index) & 1 == 1
- }
+/// A stack slot or a local variable. u8 represents the index of it (<= 8).
+#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
+pub enum RegOpnd {
+ Stack(u8),
+ Local(u8),
+}
- pub fn set(&mut self, index: u8, value: bool) {
- assert!(index < MAX_REG_TEMPS);
- if value {
- self.0 = self.0 | (1 << index);
- } else {
- self.0 = self.0 & !(1 << index);
+/// RegMappings manages a set of registers used for stack temps and locals.
+/// Each element of the array represents each of the registers.
+/// If an element is Some, the stack temp or the local uses a register.
+///
+/// Note that Opnd::InsnOut uses a separate set of registers at the moment.
+#[derive(Copy, Clone, Default, Eq, Hash, PartialEq)]
+pub struct RegMapping([Option<RegOpnd>; MAX_MAPPED_REGS]);
+
+impl RegMapping {
+ /// Return the index of the register for a given operand if allocated.
+ pub fn get_reg(&self, opnd: RegOpnd) -> Option<usize> {
+ self.0.iter().enumerate()
+ .find(|(_, &reg_opnd)| reg_opnd == Some(opnd))
+ .map(|(reg_idx, _)| reg_idx)
+ }
+
+ /// Set a given operand to the register at a given index.
+ pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) {
+ assert!(self.0[reg_idx].is_none());
+ self.0[reg_idx] = Some(opnd);
+ }
+
+ /// Allocate a register for a given operand if available.
+ /// Return true if self is updated.
+ pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
+ // If a given opnd already has a register, skip allocation.
+ if self.get_reg(opnd).is_some() {
+ return false;
}
- }
- pub fn as_u8(&self) -> u8 {
- self.0
+ // If the index is too large to encode with with 3 bits, give up.
+ match opnd {
+ RegOpnd::Stack(stack_idx) => if stack_idx >= MAX_CTX_TEMPS as u8 {
+ return false;
+ }
+ RegOpnd::Local(local_idx) => if local_idx >= MAX_CTX_LOCALS as u8 {
+ return false;
+ }
+ };
+
+ // Allocate a register if available.
+ if let Some(reg_idx) = self.find_unused_reg(opnd) {
+ self.0[reg_idx] = Some(opnd);
+ return true;
+ }
+ false
}
- /// Return true if there's a register that conflicts with a given stack_idx.
- pub fn conflicts_with(&self, stack_idx: u8) -> bool {
- let mut other_idx = stack_idx as usize % get_option!(num_temp_regs);
- while other_idx < MAX_REG_TEMPS as usize {
- if stack_idx as usize != other_idx && self.get(other_idx as u8) {
+ /// Deallocate a register for a given operand if in use.
+ /// Return true if self is updated.
+ pub fn dealloc_reg(&mut self, opnd: RegOpnd) -> bool {
+ for reg_opnd in self.0.iter_mut() {
+ if *reg_opnd == Some(opnd) {
+ *reg_opnd = None;
return true;
}
- other_idx += get_option!(num_temp_regs);
}
false
}
+
+ /// Find an available register and return the index of it.
+ fn find_unused_reg(&self, opnd: RegOpnd) -> Option<usize> {
+ let num_regs = get_option!(num_temp_regs);
+ if num_regs == 0 {
+ return None;
+ }
+ assert!(num_regs <= MAX_MAPPED_REGS);
+
+ // If the default index for the operand is available, use that to minimize
+ // discrepancies among Contexts.
+ let default_idx = match opnd {
+ RegOpnd::Stack(stack_idx) => stack_idx.as_usize() % num_regs,
+ RegOpnd::Local(local_idx) => num_regs - (local_idx.as_usize() % num_regs) - 1,
+ };
+ if self.0[default_idx].is_none() {
+ return Some(default_idx);
+ }
+
+ // If not, pick any other available register. Like default indexes, prefer
+ // lower indexes for Stack, and higher indexes for Local.
+ let mut index_temps = self.0.iter().enumerate();
+ match opnd {
+ RegOpnd::Stack(_) => index_temps.find(|(_, reg_opnd)| reg_opnd.is_none()),
+ RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
+ }.map(|(index, _)| index)
+ }
+
+ /// Return a vector of RegOpnds that have an allocated register
+ pub fn get_reg_opnds(&self) -> Vec<RegOpnd> {
+ self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect()
+ }
+
+ /// Count the number of registers that store a different operand from `dst`.
+ pub fn diff(&self, dst: RegMapping) -> usize {
+ self.0.iter().enumerate().filter(|&(reg_idx, &reg)| reg != dst.0[reg_idx]).count()
+ }
}
+impl fmt::Debug for RegMapping {
+ /// Print `[None, ...]` instead of the default `RegMappings([None, ...])`
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{:?}", self.0)
+ }
+}
+
+/// Maximum value of the chain depth (should fit in 5 bits)
+const CHAIN_DEPTH_MAX: u8 = 0b11111; // 31
+
/// Code generation context
/// Contains information we can use to specialize/optimize code
-/// There are a lot of context objects so we try to keep the size small.
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
-#[repr(packed)]
pub struct Context {
// Number of values currently on the temporary stack
stack_size: u8,
@@ -452,29 +474,742 @@ pub struct Context {
// This represents how far the JIT's SP is from the "real" SP
sp_offset: i8,
- /// Bitmap of which stack temps are in a register
- reg_temps: RegTemps,
+ /// Which stack temps or locals are in a register
+ reg_mapping: RegMapping,
+
+ // Depth of this block in the sidechain (eg: inline-cache chain)
+ // 6 bits, max 63
+ chain_depth: u8,
- /// Fields packed into u8
- /// - Lower 7 bits: Depth of this block in the sidechain (eg: inline-cache chain)
- /// - Top bit: Whether this code is the target of a JIT-to-JIT Ruby return
- /// ([Self::is_return_landing])
- chain_depth_return_landing: u8,
+ // Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing])
+ is_return_landing: bool,
+
+ // Whether the compilation of this code has been deferred ([Self::is_deferred])
+ is_deferred: bool,
// Type we track for self
self_type: Type,
// Local variable types we keep track of
- // We store 8 local types, requiring 4 bits each, for a total of 32 bits
- local_types: u32,
+ local_types: [Type; MAX_CTX_LOCALS],
+
+ // Temp mapping type/local_idx we track
+ temp_mapping: [TempMapping; MAX_CTX_TEMPS],
+
+ /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
+ inline_block: Option<IseqPtr>,
+}
- // Temp mapping kinds we track
- // 8 temp mappings * 2 bits, total 16 bits
- temp_mapping_kind: u16,
+#[derive(Clone)]
+pub struct BitVector {
+ // Flat vector of bytes to write into
+ bytes: Vec<u8>,
- // Stack slot type/local_idx we track
- // 8 temp types * 4 bits, total 32 bits
- temp_payload: u32,
+ // Number of bits taken out of bytes allocated
+ num_bits: usize,
+}
+
+impl BitVector {
+ pub fn new() -> Self {
+ Self {
+ bytes: Vec::with_capacity(4096),
+ num_bits: 0,
+ }
+ }
+
+ #[allow(unused)]
+ pub fn num_bits(&self) -> usize {
+ self.num_bits
+ }
+
+ // Total number of bytes taken
+ #[allow(unused)]
+ pub fn num_bytes(&self) -> usize {
+ (self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 }
+ }
+
+ // Write/append an unsigned integer value
+ fn push_uint(&mut self, mut val: u64, mut num_bits: usize) {
+ assert!(num_bits <= 64);
+
+ // Mask out bits above the number of bits requested
+ let mut val_bits = val;
+ if num_bits < 64 {
+ val_bits &= (1 << num_bits) - 1;
+ assert!(val == val_bits);
+ }
+
+ // Number of bits encoded in the last byte
+ let rem_bits = self.num_bits % 8;
+
+ // Encode as many bits as we can in this last byte
+ if rem_bits != 0 {
+ let num_enc = std::cmp::min(num_bits, 8 - rem_bits);
+ let bit_mask = (1 << num_enc) - 1;
+ let frac_bits = (val & bit_mask) << rem_bits;
+ let frac_bits: u8 = frac_bits.try_into().unwrap();
+ let last_byte_idx = self.bytes.len() - 1;
+ self.bytes[last_byte_idx] |= frac_bits;
+
+ self.num_bits += num_enc;
+ num_bits -= num_enc;
+ val >>= num_enc;
+ }
+
+ // While we have bits left to encode
+ while num_bits > 0 {
+ // Grow with a 1.2x growth factor instead of 2x
+ assert!(self.num_bits % 8 == 0);
+ let num_bytes = self.num_bits / 8;
+ if num_bytes == self.bytes.capacity() {
+ self.bytes.reserve_exact(self.bytes.len() / 5);
+ }
+
+ let bits = val & 0xFF;
+ let bits: u8 = bits.try_into().unwrap();
+ self.bytes.push(bits);
+
+ let bits_to_encode = std::cmp::min(num_bits, 8);
+ self.num_bits += bits_to_encode;
+ num_bits -= bits_to_encode;
+ val >>= bits_to_encode;
+ }
+ }
+
+ fn push_u8(&mut self, val: u8) {
+ self.push_uint(val as u64, 8);
+ }
+
+ fn push_u5(&mut self, val: u8) {
+ assert!(val <= 0b11111);
+ self.push_uint(val as u64, 5);
+ }
+
+ fn push_u4(&mut self, val: u8) {
+ assert!(val <= 0b1111);
+ self.push_uint(val as u64, 4);
+ }
+
+ fn push_u3(&mut self, val: u8) {
+ assert!(val <= 0b111);
+ self.push_uint(val as u64, 3);
+ }
+
+ fn push_u2(&mut self, val: u8) {
+ assert!(val <= 0b11);
+ self.push_uint(val as u64, 2);
+ }
+
+ fn push_u1(&mut self, val: u8) {
+ assert!(val <= 0b1);
+ self.push_uint(val as u64, 1);
+ }
+
+ fn push_bool(&mut self, val: bool) {
+ self.push_u1(if val { 1 } else { 0 });
+ }
+
+ // Push a context encoding opcode
+ fn push_op(&mut self, op: CtxOp) {
+ self.push_u4(op as u8);
+ }
+
+ // Read a uint value at a given bit index
+ // The bit index is incremented after the value is read
+ fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 {
+ let start_bit_idx = *bit_idx;
+ let mut cur_idx = *bit_idx;
+
+ // Read the bits in the first byte
+ let bit_mod = cur_idx % 8;
+ let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod;
+
+ let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod);
+ cur_idx += num_bits_in_byte;
+ num_bits -= num_bits_in_byte;
+
+ let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1);
+
+ // While we have bits left to read
+ while num_bits > 0 {
+ let num_bits_in_byte = std::cmp::min(num_bits, 8);
+ assert!(cur_idx % 8 == 0);
+ let byte = self.bytes[cur_idx / 8] as u64;
+
+ let bits_in_byte = byte & ((1 << num_bits) - 1);
+ out_bits |= bits_in_byte << (cur_idx - start_bit_idx);
+
+ // Move to the next byte/offset
+ cur_idx += num_bits_in_byte;
+ num_bits -= num_bits_in_byte;
+ }
+
+ // Update the read index
+ *bit_idx = cur_idx;
+
+ out_bits
+ }
+
+ fn read_u8(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 8) as u8
+ }
+
+ fn read_u5(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 5) as u8
+ }
+
+ fn read_u4(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 4) as u8
+ }
+
+ fn read_u3(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 3) as u8
+ }
+
+ fn read_u2(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 2) as u8
+ }
+
+ fn read_u1(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 1) as u8
+ }
+
+ fn read_bool(&self, bit_idx: &mut usize) -> bool {
+ self.read_u1(bit_idx) != 0
+ }
+
+ fn read_op(&self, bit_idx: &mut usize) -> CtxOp {
+ unsafe { std::mem::transmute(self.read_u4(bit_idx)) }
+ }
+}
+
+impl fmt::Debug for BitVector {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // We print the higher bytes first
+ for (idx, byte) in self.bytes.iter().enumerate().rev() {
+ write!(f, "{:08b}", byte)?;
+
+ // Insert a separator between each byte
+ if idx > 0 {
+ write!(f, "|")?;
+ }
+ }
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod bitvector_tests {
+ use super::*;
+
+ #[test]
+ fn write_3() {
+ let mut arr = BitVector::new();
+ arr.push_uint(3, 2);
+ assert!(arr.read_uint(&mut 0, 2) == 3);
+ }
+
+ #[test]
+ fn write_11() {
+ let mut arr = BitVector::new();
+ arr.push_uint(1, 1);
+ arr.push_uint(1, 1);
+ assert!(arr.read_uint(&mut 0, 2) == 3);
+ }
+
+ #[test]
+ fn write_11_overlap() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 7);
+ arr.push_uint(3, 2);
+ arr.push_uint(1, 1);
+
+ //dbg!(arr.read_uint(7, 2));
+ assert!(arr.read_uint(&mut 7, 2) == 3);
+ }
+
+ #[test]
+ fn write_ff_0() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF, 8);
+ assert!(arr.read_uint(&mut 0, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_ff_3() {
+ // Write 0xFF at bit index 3
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 3);
+ arr.push_uint(0xFF, 8);
+ assert!(arr.read_uint(&mut 3, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_ff_sandwich() {
+ // Write 0xFF sandwiched between zeros
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 3);
+ arr.push_u8(0xFF);
+ arr.push_uint(0, 3);
+ assert!(arr.read_uint(&mut 3, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_read_u32_max() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF_FF_FF_FF, 32);
+ assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF);
+ }
+
+ #[test]
+ fn write_read_u32_max_64b() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF_FF_FF_FF, 64);
+ assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF);
+ }
+
+ #[test]
+ fn write_read_u64_max() {
+ let mut arr = BitVector::new();
+ arr.push_uint(u64::MAX, 64);
+ assert!(arr.read_uint(&mut 0, 64) == u64::MAX);
+ }
+
+ #[test]
+ fn encode_default() {
+ let mut bits = BitVector::new();
+ let ctx = Context::default();
+ let start_idx = ctx.encode_into(&mut bits);
+ assert!(start_idx == 0);
+ assert!(bits.num_bits() > 0);
+ assert!(bits.num_bytes() > 0);
+
+ // Make sure that the round trip matches the input
+ let ctx2 = Context::decode_from(&bits, 0);
+ assert!(ctx2 == ctx);
+ }
+
+ #[test]
+ fn encode_default_2x() {
+ let mut bits = BitVector::new();
+
+ let ctx0 = Context::default();
+ let idx0 = ctx0.encode_into(&mut bits);
+
+ let mut ctx1 = Context::default();
+ ctx1.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]);
+ let idx1 = ctx1.encode_into(&mut bits);
+
+ // Make sure that we can encode two contexts successively
+ let ctx0_dec = Context::decode_from(&bits, idx0);
+ let ctx1_dec = Context::decode_from(&bits, idx1);
+ assert!(ctx0_dec == ctx0);
+ assert!(ctx1_dec == ctx1);
+ }
+
+ #[test]
+ fn regress_reg_mapping() {
+ let mut bits = BitVector::new();
+ let mut ctx = Context::default();
+ ctx.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]);
+ ctx.encode_into(&mut bits);
+
+ let b0 = bits.read_u1(&mut 0);
+ assert!(b0 == 1);
+
+ // Make sure that the round trip matches the input
+ let ctx2 = Context::decode_from(&bits, 0);
+ assert!(ctx2 == ctx);
+ }
+}
+
+// Context encoding opcodes (4 bits)
+#[derive(Debug, Copy, Clone)]
+#[repr(u8)]
+enum CtxOp {
+ // Self type (4 bits)
+ SetSelfType = 0,
+
+ // Local idx (3 bits), temp type (4 bits)
+ SetLocalType,
+
+ // Map stack temp to self with known type
+ // Temp idx (3 bits), known type (4 bits)
+ SetTempType,
+
+ // Map stack temp to a local variable
+ // Temp idx (3 bits), local idx (3 bits)
+ MapTempLocal,
+
+ // Map a stack temp to self
+ // Temp idx (3 bits)
+ MapTempSelf,
+
+ // Set inline block pointer (8 bytes)
+ SetInlineBlock,
+
+ // End of encoding
+ EndOfCode,
+}
+
+// Number of entries in the context cache
+const CTX_ENCODE_CACHE_SIZE: usize = 1024;
+const CTX_DECODE_CACHE_SIZE: usize = 1024;
+
+// Cache of the last contexts encoded/decoded
+// Empirically this saves a few percent of memory and speeds up compilation
+// We can experiment with varying the size of this cache
+pub type CtxEncodeCache = [(Context, u32); CTX_ENCODE_CACHE_SIZE];
+static mut CTX_ENCODE_CACHE: Option<Box<CtxEncodeCache>> = None;
+
+// Cache of the last contexts encoded/decoded
+// This speeds up compilation
+pub type CtxDecodeCache = [(Context, u32); CTX_DECODE_CACHE_SIZE];
+static mut CTX_DECODE_CACHE: Option<Box<CtxDecodeCache>> = None;
+
+// Size of the context cache in bytes
+pub const CTX_ENCODE_CACHE_BYTES: usize = std::mem::size_of::<CtxEncodeCache>();
+pub const CTX_DECODE_CACHE_BYTES: usize = std::mem::size_of::<CtxDecodeCache>();
+
+impl Context {
+ // Encode a context into the global context data, or return
+ // a cached previously encoded offset if one is found
+ pub fn encode(&self) -> u32 {
+ incr_counter!(num_contexts_encoded);
+
+ if *self == Context::default() {
+ incr_counter!(context_cache_hits);
+ return 0;
+ }
+
+ if let Some(idx) = Self::encode_cache_get(self) {
+ incr_counter!(context_cache_hits);
+ debug_assert!(Self::decode(idx) == *self);
+ return idx;
+ }
+
+ let context_data = CodegenGlobals::get_context_data();
+
+ // Make sure we don't use offset 0 because
+ // it's is reserved for the default context
+ if context_data.num_bits() == 0 {
+ context_data.push_u1(0);
+ }
+
+ let idx = self.encode_into(context_data);
+ let idx: u32 = idx.try_into().unwrap();
+
+ // Save this offset into the cache
+ Self::encode_cache_set(self, idx);
+ Self::decode_cache_set(self, idx);
+
+ // In debug mode, check that the round-trip decoding always matches
+ debug_assert!(Self::decode(idx) == *self);
+
+ idx
+ }
+
+ pub fn decode(start_idx: u32) -> Context {
+ if start_idx == 0 {
+ return Context::default();
+ };
+
+ if let Some(ctx) = Self::decode_cache_get(start_idx) {
+ return ctx;
+ }
+
+ let context_data = CodegenGlobals::get_context_data();
+ let ctx = Self::decode_from(context_data, start_idx as usize);
+
+ Self::encode_cache_set(&ctx, start_idx);
+ Self::decode_cache_set(&ctx, start_idx);
+
+ ctx
+ }
+
+ // Store an entry in a cache of recently encoded/decoded contexts for encoding
+ fn encode_cache_set(ctx: &Context, idx: u32)
+ {
+ // Compute the hash for this context
+ let mut hasher = DefaultHasher::new();
+ ctx.hash(&mut hasher);
+ let ctx_hash = hasher.finish() as usize;
+
+ unsafe {
+ // Lazily initialize the context cache
+ if CTX_ENCODE_CACHE == None {
+ // Here we use the vec syntax to avoid allocating the large table on the stack,
+ // as this can cause a stack overflow
+ let tbl = vec![(Context::default(), 0); CTX_ENCODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap();
+ CTX_ENCODE_CACHE = Some(tbl);
+ }
+
+ // Write a cache entry for this context
+ let cache = CTX_ENCODE_CACHE.as_mut().unwrap();
+ cache[ctx_hash % CTX_ENCODE_CACHE_SIZE] = (*ctx, idx);
+ }
+ }
+
+ // Store an entry in a cache of recently encoded/decoded contexts for decoding
+ fn decode_cache_set(ctx: &Context, idx: u32) {
+ unsafe {
+ // Lazily initialize the context cache
+ if CTX_DECODE_CACHE == None {
+ // Here we use the vec syntax to avoid allocating the large table on the stack,
+ // as this can cause a stack overflow
+ let tbl = vec![(Context::default(), 0); CTX_DECODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap();
+ CTX_DECODE_CACHE = Some(tbl);
+ }
+
+ // Write a cache entry for this context
+ let cache = CTX_DECODE_CACHE.as_mut().unwrap();
+ cache[idx as usize % CTX_DECODE_CACHE_SIZE] = (*ctx, idx);
+ }
+ }
+
+ // Lookup the context in a cache of recently encoded/decoded contexts for encoding
+ fn encode_cache_get(ctx: &Context) -> Option<u32>
+ {
+ // Compute the hash for this context
+ let mut hasher = DefaultHasher::new();
+ ctx.hash(&mut hasher);
+ let ctx_hash = hasher.finish() as usize;
+
+ unsafe {
+ if CTX_ENCODE_CACHE == None {
+ return None;
+ }
+
+ let cache = CTX_ENCODE_CACHE.as_mut().unwrap();
+
+ // Check that the context for this cache entry matches
+ let cache_entry = &cache[ctx_hash % CTX_ENCODE_CACHE_SIZE];
+ if cache_entry.0 == *ctx {
+ debug_assert!(cache_entry.1 != 0);
+ return Some(cache_entry.1);
+ }
+
+ return None;
+ }
+ }
+
+ // Lookup the context in a cache of recently encoded/decoded contexts for decoding
+ fn decode_cache_get(start_idx: u32) -> Option<Context> {
+ unsafe {
+ if CTX_DECODE_CACHE == None {
+ return None;
+ }
+
+ let cache = CTX_DECODE_CACHE.as_mut().unwrap();
+
+ // Check that the start_idx for this cache entry matches
+ let cache_entry = &cache[start_idx as usize % CTX_DECODE_CACHE_SIZE];
+ if cache_entry.1 == start_idx {
+ return Some(cache_entry.0);
+ }
+
+ return None;
+ }
+ }
+
+ // Encode into a compressed context representation in a bit vector
+ fn encode_into(&self, bits: &mut BitVector) -> usize {
+ let start_idx = bits.num_bits();
+
+ // Most of the time, the stack size is small and sp offset has the same value
+ if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 {
+ // One single bit to signify a compact stack_size/sp_offset encoding
+ debug_assert!(self.sp_offset >= 0);
+ bits.push_u1(1);
+ bits.push_u2(self.stack_size);
+ } else {
+ // Full stack size encoding
+ bits.push_u1(0);
+
+ // Number of values currently on the temporary stack
+ bits.push_u8(self.stack_size);
+
+ // sp_offset: i8,
+ bits.push_u8(self.sp_offset as u8);
+ }
+
+ // Which stack temps or locals are in a register
+ for &temp in self.reg_mapping.0.iter() {
+ if let Some(temp) = temp {
+ bits.push_u1(1); // Some
+ match temp {
+ RegOpnd::Stack(stack_idx) => {
+ bits.push_u1(0); // Stack
+ bits.push_u3(stack_idx);
+ }
+ RegOpnd::Local(local_idx) => {
+ bits.push_u1(1); // Local
+ bits.push_u3(local_idx);
+ }
+ }
+ } else {
+ bits.push_u1(0); // None
+ }
+ }
+
+ bits.push_bool(self.is_deferred);
+ bits.push_bool(self.is_return_landing);
+
+ // The chain depth is most often 0 or 1
+ if self.chain_depth < 2 {
+ bits.push_u1(0);
+ bits.push_u1(self.chain_depth);
+
+ } else {
+ bits.push_u1(1);
+ bits.push_u5(self.chain_depth);
+ }
+
+ // Encode the self type if known
+ if self.self_type != Type::Unknown {
+ bits.push_op(CtxOp::SetSelfType);
+ bits.push_u4(self.self_type as u8);
+ }
+
+ // Encode the local types if known
+ for local_idx in 0..MAX_CTX_LOCALS {
+ let t = self.get_local_type(local_idx);
+ if t != Type::Unknown {
+ bits.push_op(CtxOp::SetLocalType);
+ bits.push_u3(local_idx as u8);
+ bits.push_u4(t as u8);
+ }
+ }
+
+ // Encode stack temps
+ for stack_idx in 0..MAX_CTX_TEMPS {
+ let mapping = self.get_temp_mapping(stack_idx);
+
+ match mapping {
+ MapToStack(temp_type) => {
+ if temp_type != Type::Unknown {
+ // Temp idx (3 bits), known type (4 bits)
+ bits.push_op(CtxOp::SetTempType);
+ bits.push_u3(stack_idx as u8);
+ bits.push_u4(temp_type as u8);
+ }
+ }
+
+ MapToLocal(local_idx) => {
+ bits.push_op(CtxOp::MapTempLocal);
+ bits.push_u3(stack_idx as u8);
+ bits.push_u3(local_idx);
+ }
+
+ MapToSelf => {
+ // Temp idx (3 bits)
+ bits.push_op(CtxOp::MapTempSelf);
+ bits.push_u3(stack_idx as u8);
+ }
+ }
+ }
+
+ // Inline block pointer
+ if let Some(iseq) = self.inline_block {
+ bits.push_op(CtxOp::SetInlineBlock);
+ bits.push_uint(iseq as u64, 64);
+ }
+
+ // TODO: should we add an op for end-of-encoding,
+ // or store num ops at the beginning?
+ bits.push_op(CtxOp::EndOfCode);
+
+ start_idx
+ }
+
+ // Decode a compressed context representation from a bit vector
+ fn decode_from(bits: &BitVector, start_idx: usize) -> Context {
+ let mut ctx = Context::default();
+
+ let mut idx = start_idx;
+
+ // Small vs large stack size encoding
+ if bits.read_u1(&mut idx) == 1 {
+ ctx.stack_size = bits.read_u2(&mut idx);
+ ctx.sp_offset = ctx.stack_size as i8;
+ } else {
+ ctx.stack_size = bits.read_u8(&mut idx);
+ let sp_offset_bits = bits.read_u8(&mut idx);
+ ctx.sp_offset = sp_offset_bits as i8;
+
+ // If the top bit is set, then the sp offset must be negative
+ debug_assert!(!( (sp_offset_bits & 0x80) != 0 && ctx.sp_offset > 0 ));
+ }
+
+ // Which stack temps or locals are in a register
+ for index in 0..MAX_MAPPED_REGS {
+ if bits.read_u1(&mut idx) == 1 { // Some
+ let temp = if bits.read_u1(&mut idx) == 0 { // RegMapping::Stack
+ RegOpnd::Stack(bits.read_u3(&mut idx))
+ } else {
+ RegOpnd::Local(bits.read_u3(&mut idx))
+ };
+ ctx.reg_mapping.0[index] = Some(temp);
+ }
+ }
+
+ ctx.is_deferred = bits.read_bool(&mut idx);
+ ctx.is_return_landing = bits.read_bool(&mut idx);
+
+ if bits.read_u1(&mut idx) == 0 {
+ ctx.chain_depth = bits.read_u1(&mut idx)
+ } else {
+ ctx.chain_depth = bits.read_u5(&mut idx)
+ }
+
+ loop {
+ //println!("reading op");
+ let op = bits.read_op(&mut idx);
+ //println!("got op {:?}", op);
+
+ match op {
+ CtxOp::SetSelfType => {
+ ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) };
+ }
+
+ CtxOp::SetLocalType => {
+ let local_idx = bits.read_u3(&mut idx) as usize;
+ let t = unsafe { transmute(bits.read_u4(&mut idx)) };
+ ctx.set_local_type(local_idx, t);
+ }
+
+ // Map temp to stack (known type)
+ CtxOp::SetTempType => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ let temp_type = unsafe { transmute(bits.read_u4(&mut idx)) };
+ ctx.set_temp_mapping(temp_idx, TempMapping::MapToStack(temp_type));
+ }
+
+ // Map temp to local
+ CtxOp::MapTempLocal => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ let local_idx = bits.read_u3(&mut idx);
+ ctx.set_temp_mapping(temp_idx, TempMapping::MapToLocal(local_idx));
+ }
+
+ // Map temp to self
+ CtxOp::MapTempSelf => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ ctx.set_temp_mapping(temp_idx, TempMapping::MapToSelf);
+ }
+
+ // Inline block pointer
+ CtxOp::SetInlineBlock => {
+ ctx.inline_block = Some(bits.read_uint(&mut idx, 64) as IseqPtr);
+ }
+
+ CtxOp::EndOfCode => break,
+ }
+ }
+
+ ctx
+ }
}
/// Tuple of (iseq, idx) used to identify basic blocks
@@ -638,7 +1373,7 @@ impl BranchTarget {
}
}
- fn get_ctx(&self) -> Context {
+ fn get_ctx(&self) -> u32 {
match self {
BranchTarget::Stub(stub) => stub.ctx,
BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
@@ -665,14 +1400,14 @@ struct BranchStub {
address: Option<CodePtr>,
iseq: Cell<IseqPtr>,
iseq_idx: IseqIdx,
- ctx: Context,
+ ctx: u32,
}
/// Store info about an outgoing branch in a code segment
/// Note: care must be taken to minimize the size of branch objects
pub struct Branch {
// Block this is attached to
- block: BlockRef,
+ block: Cell<BlockRef>,
// Positions where the generated code starts and ends
start_addr: CodePtr,
@@ -770,12 +1505,13 @@ impl std::fmt::Debug for Branch {
impl PendingBranch {
/// Set up a branch target at `target_idx`. Find an existing block to branch to
/// or generate a stub for one.
+ #[must_use]
fn set_target(
&self,
target_idx: u32,
target: BlockId,
ctx: &Context,
- ocb: &mut OutlinedCb,
+ jit: &mut JITState,
) -> Option<CodePtr> {
// If the block already exists
if let Some(blockref) = find_block_version(target, ctx) {
@@ -787,10 +1523,13 @@ impl PendingBranch {
return Some(block.start_addr);
}
+ // Compress/encode the context
+ let ctx = Context::encode(ctx);
+
// The branch struct is uninitialized right now but as a stable address.
// We make sure the stub runs after the branch is initialized.
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
- let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx);
+ let stub_addr = gen_branch_stub(ctx, jit.iseq, jit.get_ocb(), branch_struct_addr, target_idx);
if let Some(stub_addr) = stub_addr {
// Fill the branch target with a stub
@@ -798,7 +1537,7 @@ impl PendingBranch {
address: Some(stub_addr),
iseq: Cell::new(target.iseq),
iseq_idx: target.idx,
- ctx: *ctx,
+ ctx,
})))));
}
@@ -809,7 +1548,7 @@ impl PendingBranch {
fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef {
// Make the branch
let branch = Branch {
- block: uninit_block,
+ block: Cell::new(uninit_block),
start_addr: self.start_addr.get().unwrap(),
end_addr: Cell::new(self.end_addr.get().unwrap()),
targets: self.targets,
@@ -840,6 +1579,7 @@ impl PendingBranch {
}
branch.assert_layout();
+ incr_counter!(compiled_branch_count);
branchref
}
@@ -891,21 +1631,18 @@ pub struct Block {
// Context at the start of the block
// This should never be mutated
- ctx: Context,
+ ctx: u32,
// Positions where the generated code starts and ends
start_addr: CodePtr,
end_addr: Cell<CodePtr>,
// List of incoming branches (from predecessors)
- // These are reference counted (ownership shared between predecessor and successors)
incoming: MutableBranchList,
- // NOTE: we might actually be able to store the branches here without refcounting
- // however, using a RefCell makes it easy to get a pointer to Branch objects
- //
// List of outgoing branches (to successors)
- outgoing: Box<[BranchRef]>,
+ // Infrequently mutated for control flow graph edits for saving memory.
+ outgoing: MutableBranchList,
// FIXME: should these be code pointers instead?
// Offsets for GC managed objects in the mainline code block
@@ -972,6 +1709,26 @@ impl MutableBranchList {
current_list.push(branch);
self.0.set(current_list.into_boxed_slice());
}
+
+ /// Iterate through branches in the list by moving out of the cell
+ /// and then putting it back when done. Modifications to this cell
+ /// during iteration will be discarded.
+ ///
+ /// Assumes panic=abort since panic=unwind during iteration would
+ /// leave the cell empty.
+ fn for_each(&self, mut f: impl FnMut(BranchRef)) {
+ let list = self.0.take();
+ for branch in list.iter() {
+ f(*branch);
+ }
+ self.0.set(list);
+ }
+
+ /// Length of the list.
+ fn len(&self) -> usize {
+ // SAFETY: No cell mutation inside unsafe.
+ unsafe { self.0.ref_unchecked().len() }
+ }
}
impl fmt::Debug for MutableBranchList {
@@ -979,7 +1736,7 @@ impl fmt::Debug for MutableBranchList {
// SAFETY: the derived Clone for boxed slices does not mutate this Cell
let branches = unsafe { self.0.ref_unchecked().clone() };
- formatter.debug_list().entries(branches.into_iter()).finish()
+ formatter.debug_list().entries(branches.iter()).finish()
}
}
@@ -992,7 +1749,7 @@ pub struct IseqPayload {
// Basic block versions
pub version_map: VersionMap,
- // Indexes of code pages used by this this ISEQ
+ // Indexes of code pages used by this ISEQ
pub pages: HashSet<usize>,
// List of ISEQ entry codes
@@ -1061,16 +1818,7 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
callback(iseq);
}
let mut data: &mut dyn FnMut(IseqPtr) = &mut callback;
- unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
-}
-
-/// Iterate over all ISEQ payloads
-pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
- for_each_iseq(|iseq| {
- if let Some(iseq_payload) = get_iseq_payload(iseq) {
- callback(iseq_payload);
- }
- });
+ unsafe { rb_jit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
/// Iterate over all on-stack ISEQs
@@ -1117,8 +1865,12 @@ pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback:
/// Free the per-iseq payload
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) {
+ // Free invariants for the ISEQ
+ iseq_free_invariants(iseq);
+
let payload = {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
if payload.is_null() {
// Nothing to free.
return;
@@ -1168,7 +1920,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
// For aliasing, having the VM lock hopefully also implies that no one
// else has an overlapping &mut IseqPayload.
unsafe {
- rb_yjit_assert_holding_vm_lock();
+ rb_assert_holding_vm_lock();
&*(payload as *const IseqPayload)
}
};
@@ -1202,7 +1954,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
}
// Mark outgoing branch entries
- for branch in block.outgoing.iter() {
+ block.outgoing.for_each(|branch| {
let branch = unsafe { branch.as_ref() };
for target in branch.targets.iter() {
// SAFETY: no mutation inside unsafe
@@ -1222,7 +1974,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
unsafe { rb_gc_mark_movable(target_iseq.into()) };
}
}
- }
+ });
// Mark references to objects in generated code.
// Skip for dead blocks since they shouldn't run.
@@ -1245,7 +1997,8 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
/// GC callback for updating GC objects in the per-iseq payload.
/// This is a mirror of [rb_yjit_iseq_mark].
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
let payload = if payload.is_null() {
// Nothing to update.
return;
@@ -1256,7 +2009,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// For aliasing, having the VM lock hopefully also implies that no one
// else has an overlapping &mut IseqPayload.
unsafe {
- rb_yjit_assert_holding_vm_lock();
+ rb_assert_holding_vm_lock();
&*(payload as *const IseqPayload)
}
};
@@ -1282,13 +2035,6 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
block_update_references(block, cb, true);
}
- // Note that we would have returned already if YJIT is off.
- cb.mark_all_executable();
-
- CodegenGlobals::get_outlined_cb()
- .unwrap()
- .mark_all_executable();
-
return;
fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) {
@@ -1302,7 +2048,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
}
// Update outgoing branch entries
- for branch in block.outgoing.iter() {
+ block.outgoing.for_each(|branch| {
let branch = unsafe { branch.as_ref() };
for target in branch.targets.iter() {
// SAFETY: no mutation inside unsafe
@@ -1326,7 +2072,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) };
}
}
- }
+ });
// Update references to objects in generated code.
// Skip for dead blocks since they shouldn't run and
@@ -1345,11 +2091,9 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
// Only write when the VALUE moves, to be copy-on-write friendly.
if new_addr != object {
- for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
- let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
- cb.write_mem(byte_code_ptr, byte)
- .expect("patching existing code should be within bounds");
- }
+ // SAFETY: Since we already set code memory writable before the compacting phase,
+ // we can use raw memory accesses directly.
+ unsafe { value_ptr.write_unaligned(new_addr); }
}
}
}
@@ -1357,6 +2101,34 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
}
}
+/// Mark all code memory as writable.
+/// This function is useful for garbage collectors that update references in JIT-compiled code in
+/// bulk.
+#[no_mangle]
+pub extern "C" fn rb_yjit_mark_all_writeable() {
+ if CodegenGlobals::has_instance() {
+ CodegenGlobals::get_inline_cb().mark_all_writeable();
+
+ CodegenGlobals::get_outlined_cb()
+ .unwrap()
+ .mark_all_writeable();
+ }
+}
+
+/// Mark all code memory as executable.
+/// This function is useful for garbage collectors that update references in JIT-compiled code in
+/// bulk.
+#[no_mangle]
+pub extern "C" fn rb_yjit_mark_all_executable() {
+ if CodegenGlobals::has_instance() {
+ CodegenGlobals::get_inline_cb().mark_all_executable();
+
+ CodegenGlobals::get_outlined_cb()
+ .unwrap()
+ .mark_all_executable();
+ }
+}
+
/// Get all blocks for a particular place in an iseq.
fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> {
let insn_idx = blockid.idx.as_usize();
@@ -1394,15 +2166,28 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
}
-/// Count the number of block versions matching a given blockid
-fn get_num_versions(blockid: BlockId) -> usize {
+/// Count the number of block versions that match a given BlockId and part of a Context
+fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
+
+ // FIXME: this counting logic is going to be expensive.
+ // We should avoid it if possible
+
Some(payload) => {
payload
.version_map
.get(insn_idx)
- .map(|versions| versions.len())
+ .map(|versions| {
+ versions.iter().filter(|&&version| {
+ let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx);
+ // Inline versions are counted separately towards MAX_INLINE_VERSIONS.
+ version_ctx.inline() == ctx.inline() &&
+ // find_block_versions() finds only blocks with compatible reg_mapping,
+ // so count only versions with compatible reg_mapping.
+ version_ctx.reg_mapping == ctx.reg_mapping
+ }).count()
+ })
.unwrap_or(0)
}
None => 0,
@@ -1433,10 +2218,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
/// Retrieve a basic block version for an (iseq, idx) tuple
/// This will return None if no version is found
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
- let versions = match get_version_list(blockid) {
- Some(versions) => versions,
- None => return None,
- };
+ let versions = get_version_list(blockid)?;
// Best match found
let mut best_version: Option<BlockRef> = None;
@@ -1445,10 +2227,11 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
// For each version matching the blockid
for blockref in versions.iter() {
let block = unsafe { blockref.as_ref() };
+ let block_ctx = Context::decode(block.ctx);
// Note that we always prefer the first matching
// version found because of inline-cache chains
- match ctx.diff(&block.ctx) {
+ match ctx.diff(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => {
best_version = Some(*blockref);
best_diff = diff;
@@ -1460,6 +2243,35 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version;
}
+/// Find the closest RegMapping among ones that have already been compiled.
+pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option<RegMapping> {
+ let versions = get_version_list(blockid)?;
+
+ // Best match found
+ let mut best_mapping: Option<RegMapping> = None;
+ let mut best_diff = usize::MAX;
+
+ // For each version matching the blockid
+ for blockref in versions.iter() {
+ let block = unsafe { blockref.as_ref() };
+ let block_ctx = Context::decode(block.ctx);
+
+ // Discover the best block that is compatible if we load/spill registers
+ match ctx.diff_allowing_reg_mismatch(&block_ctx) {
+ TypeDiff::Compatible(diff) if diff < best_diff => {
+ best_mapping = Some(block_ctx.get_reg_mapping());
+ best_diff = diff;
+ }
+ _ => {}
+ }
+ }
+
+ best_mapping
+}
+
+/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
+const MAX_INLINE_VERSIONS: usize = 1000;
+
/// Produce a generic context when the block version limit is hit for a blockid
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
// Guard chains implement limits separately, do nothing
@@ -1467,21 +2279,41 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx;
}
+ let next_versions = get_num_versions(blockid, ctx) + 1;
+ let max_versions = if ctx.inline() {
+ MAX_INLINE_VERSIONS
+ } else {
+ get_option!(max_versions)
+ };
+
// If this block version we're about to add will hit the version limit
- if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+ if next_versions >= max_versions {
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints.
// This new context will then match all future requests.
let generic_ctx = ctx.get_generic_ctx();
- debug_assert_ne!(
- TypeDiff::Incompatible,
- ctx.diff(&generic_ctx),
- "should substitute a compatible context",
- );
+ if cfg!(debug_assertions) {
+ let mut ctx = ctx.clone();
+ if ctx.inline() {
+ // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
+ // to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
+ ctx.inline_block = None;
+ assert!(generic_ctx.inline_block == None);
+ }
+
+ assert_ne!(
+ TypeDiff::Incompatible,
+ ctx.diff(&generic_ctx),
+ "should substitute a compatible context",
+ );
+ }
return generic_ctx;
}
+ if ctx.inline() {
+ incr_counter_to!(max_inline_versions, next_versions);
+ }
return *ctx;
}
@@ -1509,7 +2341,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
let block = unsafe { blockref.as_ref() };
// Function entry blocks must have stack size 0
- assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0));
+ debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0));
let version_list = get_or_create_version_list(block.get_blockid());
@@ -1539,6 +2371,9 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
}
incr_counter!(compiled_block_count);
+ if Context::decode(block.ctx).inline() {
+ incr_counter!(inline_block_count);
+ }
// Mark code pages for code GC
let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap();
@@ -1559,7 +2394,7 @@ fn remove_block_version(blockref: &BlockRef) {
version_list.retain(|other| blockref != other);
}
-impl JITState {
+impl<'a> JITState<'a> {
// Finish compiling and turn a jit state into a block
// note that the block is still not in shape.
pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec<u32>) -> BlockRef {
@@ -1568,29 +2403,33 @@ impl JITState {
incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len());
+ let ctx = Context::encode(&self.get_starting_ctx());
+
// Make the new block
let block = MaybeUninit::new(Block {
start_addr,
iseq: Cell::new(self.get_iseq()),
iseq_range: self.get_starting_insn_idx()..end_insn_idx,
- ctx: self.get_starting_ctx(),
+ ctx,
end_addr: Cell::new(end_addr),
incoming: MutableBranchList(Cell::default()),
gc_obj_offsets: gc_obj_offsets.into_boxed_slice(),
entry_exit: self.get_block_entry_exit(),
cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(),
// Pending branches => actual branches
- outgoing: self.pending_outgoing.into_iter().map(|pending_out| {
+ outgoing: MutableBranchList(Cell::new(self.pending_outgoing.into_iter().map(|pending_out| {
let pending_out = Rc::try_unwrap(pending_out)
- .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block");
+ .unwrap_or_else(|rc| panic!(
+ "PendingBranchRef should be unique when ready to construct a Block. \
+ strong={} weak={}", Rc::strong_count(&rc), Rc::weak_count(&rc)));
pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box"))
- }).collect()
+ }).collect()))
});
// Initialize it on the heap
// SAFETY: allocated with Box above
unsafe { ptr::write(blockref, block) };
- // Block is initialized now. Note that MaybeUnint<T> has the same layout as T.
+ // Block is initialized now. Note that MaybeUninit<T> has the same layout as T.
let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box");
// Track all the assumptions the block makes as invariants
@@ -1607,6 +2446,12 @@ impl JITState {
if let Some(idlist) = self.stable_constant_names_assumption {
track_stable_constant_names_assumption(blockref, idlist);
}
+ for klass in self.no_singleton_class_assumptions {
+ track_no_singleton_class_assumption(blockref, klass);
+ }
+ if self.no_ep_escape {
+ track_no_ep_escape_assumption(blockref, self.iseq);
+ }
blockref
}
@@ -1623,10 +2468,10 @@ impl Block {
pub fn get_ctx_count(&self) -> usize {
let mut count = 1; // block.ctx
- for branch in self.outgoing.iter() {
+ self.outgoing.for_each(|branch| {
// SAFETY: &self implies it's initialized
count += unsafe { branch.as_ref() }.get_stub_count();
- }
+ });
count
}
@@ -1670,10 +2515,13 @@ impl Context {
let mut generic_ctx = Context::default();
generic_ctx.stack_size = self.stack_size;
generic_ctx.sp_offset = self.sp_offset;
- generic_ctx.reg_temps = self.reg_temps;
+ generic_ctx.reg_mapping = self.reg_mapping;
if self.is_return_landing() {
generic_ctx.set_as_return_landing();
}
+ if self.is_deferred() {
+ generic_ctx.mark_as_deferred();
+ }
generic_ctx
}
@@ -1695,56 +2543,78 @@ impl Context {
self.sp_offset = offset;
}
- pub fn get_reg_temps(&self) -> RegTemps {
- self.reg_temps
+ pub fn get_reg_mapping(&self) -> RegMapping {
+ self.reg_mapping
}
- pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
- self.reg_temps = reg_temps;
+ pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) {
+ self.reg_mapping = reg_mapping;
}
pub fn get_chain_depth(&self) -> u8 {
- self.chain_depth_return_landing & 0x7f
+ self.chain_depth
}
- pub fn reset_chain_depth(&mut self) {
- self.chain_depth_return_landing &= 0x80;
+ pub fn reset_chain_depth_and_defer(&mut self) {
+ self.chain_depth = 0;
+ self.is_deferred = false;
}
pub fn increment_chain_depth(&mut self) {
- if self.get_chain_depth() == 0x7f {
+ if self.get_chain_depth() == CHAIN_DEPTH_MAX {
panic!("max block version chain depth reached!");
}
- self.chain_depth_return_landing += 1;
+ self.chain_depth += 1;
}
pub fn set_as_return_landing(&mut self) {
- self.chain_depth_return_landing |= 0x80;
+ self.is_return_landing = true;
}
pub fn clear_return_landing(&mut self) {
- self.chain_depth_return_landing &= 0x7f;
+ self.is_return_landing = false;
}
pub fn is_return_landing(&self) -> bool {
- self.chain_depth_return_landing & 0x80 > 0
+ self.is_return_landing
+ }
+
+ pub fn mark_as_deferred(&mut self) {
+ self.is_deferred = true;
+ }
+
+ pub fn is_deferred(&self) -> bool {
+ self.is_deferred
}
/// Get an operand for the adjusted stack pointer address
- pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd {
- let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes;
- let offset = offset as i32;
+ pub fn sp_opnd(&self, offset: i32) -> Opnd {
+ let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32;
return Opnd::mem(64, SP, offset);
}
- /// Stop using a register for a given stack temp.
+ /// Get an operand for the adjusted environment pointer address using SP register.
+ /// This is valid only when a Binding object hasn't been created for the frame.
+ pub fn ep_opnd(&self, offset: i32) -> Opnd {
+ let ep_offset = self.get_stack_size() as i32 + 1;
+ self.sp_opnd(-ep_offset + offset)
+ }
+
+ /// Start using a register for a given stack temp or a local.
+ pub fn alloc_reg(&mut self, opnd: RegOpnd) {
+ let mut reg_mapping = self.get_reg_mapping();
+ if reg_mapping.alloc_reg(opnd) {
+ self.set_reg_mapping(reg_mapping);
+ }
+ }
+
+ /// Stop using a register for a given stack temp or a local.
/// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp).
- pub fn dealloc_temp_reg(&mut self, stack_idx: u8) {
- if stack_idx < MAX_REG_TEMPS {
- let mut reg_temps = self.get_reg_temps();
- reg_temps.set(stack_idx, false);
- self.set_reg_temps(reg_temps);
+ pub fn dealloc_reg(&mut self, opnd: RegOpnd) {
+ let mut reg_mapping = self.get_reg_mapping();
+ if reg_mapping.dealloc_reg(opnd) {
+ self.set_reg_mapping(reg_mapping);
}
}
@@ -1757,19 +2627,18 @@ impl Context {
let stack_idx: usize = (self.stack_size - 1 - idx).into();
// If outside of tracked range, do nothing
- if stack_idx >= MAX_TEMP_TYPES {
+ if stack_idx >= MAX_CTX_TEMPS {
return Type::Unknown;
}
let mapping = self.get_temp_mapping(stack_idx);
- match mapping.get_kind() {
+ match mapping {
MapToSelf => self.self_type,
- MapToStack => mapping.get_type(),
- MapToLocal => {
- let idx = mapping.get_local_idx();
- assert!((idx as usize) < MAX_LOCAL_TYPES);
- return self.get_local_type(idx.into());
+ MapToStack(temp_type) => temp_type,
+ MapToLocal(local_idx) => {
+ assert!((local_idx as usize) < MAX_CTX_LOCALS);
+ return self.get_local_type(local_idx.into());
}
}
}
@@ -1778,82 +2647,23 @@ impl Context {
/// Get the currently tracked type for a local variable
pub fn get_local_type(&self, local_idx: usize) -> Type {
- if local_idx >= MAX_LOCAL_TYPES {
- return Type::Unknown
+ if local_idx >= MAX_CTX_LOCALS {
+ Type::Unknown
} else {
- // Each type is stored in 4 bits
- let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111;
- unsafe { transmute::<u8, Type>(type_bits as u8) }
+ self.local_types[local_idx]
}
}
/// Get the current temp mapping for a given stack slot
fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping {
- assert!(temp_idx < MAX_TEMP_TYPES);
-
- // Extract the temp mapping kind
- let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11;
- let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) };
-
- // Extract the payload bits (temp type or local idx)
- let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111;
-
- match temp_kind {
- MapToSelf => TempMapping::map_to_self(),
-
- MapToStack => {
- TempMapping::map_to_stack(
- unsafe { transmute::<u8, Type>(payload_bits as u8) }
- )
- }
-
- MapToLocal => {
- TempMapping::map_to_local(
- payload_bits as u8
- )
- }
- }
+ assert!(temp_idx < MAX_CTX_TEMPS);
+ self.temp_mapping[temp_idx]
}
- /// Get the current temp mapping for a given stack slot
+ /// Set the current temp mapping for a given stack slot
fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) {
- assert!(temp_idx < MAX_TEMP_TYPES);
-
- // Extract the kind bits
- let mapping_kind = mapping.get_kind();
- let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) };
- assert!(kind_bits <= 0b11);
-
- // Extract the payload bits
- let payload_bits = match mapping_kind {
- MapToSelf => 0,
-
- MapToStack => {
- let t = mapping.get_type();
- unsafe { transmute::<Type, u8>(t) }
- }
-
- MapToLocal => {
- mapping.get_local_idx()
- }
- };
- assert!(payload_bits <= 0b1111);
-
- // Update the kind bits
- {
- let mask_bits = 0b11_u16 << (2 * temp_idx);
- let shifted_bits = (kind_bits as u16) << (2 * temp_idx);
- let all_kind_bits = self.temp_mapping_kind as u16;
- self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits;
- }
-
- // Update the payload bits
- {
- let mask_bits = 0b1111_u32 << (4 * temp_idx);
- let shifted_bits = (payload_bits as u32) << (4 * temp_idx);
- let all_payload_bits = self.temp_payload as u32;
- self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits;
- }
+ assert!(temp_idx < MAX_CTX_TEMPS);
+ self.temp_mapping[temp_idx] = mapping;
}
/// Upgrade (or "learn") the type of an instruction operand
@@ -1873,25 +2683,27 @@ impl Context {
let stack_idx = (self.stack_size - 1 - idx) as usize;
// If outside of tracked range, do nothing
- if stack_idx >= MAX_TEMP_TYPES {
+ if stack_idx >= MAX_CTX_TEMPS {
return;
}
let mapping = self.get_temp_mapping(stack_idx);
- match mapping.get_kind() {
+ match mapping {
MapToSelf => self.self_type.upgrade(opnd_type),
- MapToStack => {
- let mut temp_type = mapping.get_type();
+ MapToStack(mut temp_type) => {
temp_type.upgrade(opnd_type);
- self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type));
+ self.set_temp_mapping(stack_idx, TempMapping::MapToStack(temp_type));
}
- MapToLocal => {
- let idx = mapping.get_local_idx() as usize;
- assert!(idx < MAX_LOCAL_TYPES);
+ MapToLocal(local_idx) => {
+ let idx = local_idx as usize;
+ assert!(idx < MAX_CTX_LOCALS);
let mut new_type = self.get_local_type(idx);
new_type.upgrade(opnd_type);
self.set_local_type(idx, new_type);
+ // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches
+ // all MapToLocal mappings, including the one we're upgrading here.
+ self.set_opnd_mapping(opnd, mapping);
}
}
}
@@ -1907,18 +2719,18 @@ impl Context {
let opnd_type = self.get_opnd_type(opnd);
match opnd {
- SelfOpnd => TempMapping::map_to_self(),
+ SelfOpnd => TempMapping::MapToSelf,
StackOpnd(idx) => {
assert!(idx < self.stack_size);
let stack_idx = (self.stack_size - 1 - idx) as usize;
- if stack_idx < MAX_TEMP_TYPES {
+ if stack_idx < MAX_CTX_TEMPS {
self.get_temp_mapping(stack_idx)
} else {
// We can't know the source of this stack operand, so we assume it is
// a stack-only temporary. type will be UNKNOWN
assert!(opnd_type == Type::Unknown);
- TempMapping::map_to_stack(opnd_type)
+ TempMapping::MapToStack(opnd_type)
}
}
}
@@ -1938,7 +2750,7 @@ impl Context {
}
// If outside of tracked range, do nothing
- if stack_idx >= MAX_TEMP_TYPES {
+ if stack_idx >= MAX_CTX_TEMPS {
return;
}
@@ -1954,35 +2766,30 @@ impl Context {
return;
}
- if local_idx >= MAX_LOCAL_TYPES {
+ if local_idx >= MAX_CTX_LOCALS {
return
}
// If any values on the stack map to this local we must detach them
- for mapping_idx in 0..MAX_TEMP_TYPES {
+ for mapping_idx in 0..MAX_CTX_TEMPS {
let mapping = self.get_temp_mapping(mapping_idx);
- let tm = match mapping.get_kind() {
- MapToStack => mapping,
+ let tm = match mapping {
+ MapToStack(_) => mapping,
MapToSelf => mapping,
- MapToLocal => {
- let idx = mapping.get_local_idx();
+ MapToLocal(idx) => {
if idx as usize == local_idx {
let local_type = self.get_local_type(local_idx);
- TempMapping::map_to_stack(local_type)
+ TempMapping::MapToStack(local_type)
} else {
- TempMapping::map_to_local(idx)
+ TempMapping::MapToLocal(idx)
}
}
};
self.set_temp_mapping(mapping_idx, tm);
}
- // Update the type bits
- let type_bits = local_type as u32;
- assert!(type_bits <= 0b1111);
- let mask_bits = 0b1111_u32 << (4 * local_idx);
- let shifted_bits = type_bits << (4 * local_idx);
- self.local_types = (self.local_types & !mask_bits) | shifted_bits;
+ // Update the type
+ self.local_types[local_idx] = local_type;
}
/// Erase local variable type information
@@ -1991,16 +2798,26 @@ impl Context {
// When clearing local types we must detach any stack mappings to those
// locals. Even if local values may have changed, stack values will not.
- for mapping_idx in 0..MAX_TEMP_TYPES {
+ for mapping_idx in 0..MAX_CTX_TEMPS {
let mapping = self.get_temp_mapping(mapping_idx);
- if mapping.get_kind() == MapToLocal {
- let local_idx = mapping.get_local_idx() as usize;
- self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx)));
+ if let MapToLocal(local_idx) = mapping {
+ let local_idx = local_idx as usize;
+ self.set_temp_mapping(mapping_idx, TempMapping::MapToStack(self.get_local_type(local_idx)));
}
}
// Clear the local types
- self.local_types = 0;
+ self.local_types = [Type::default(); MAX_CTX_LOCALS];
+ }
+
+ /// Return true if the code is inlined by the caller
+ pub fn inline(&self) -> bool {
+ self.inline_block.is_some()
+ }
+
+ /// Set a block ISEQ given to the Block of this Context
+ pub fn set_inline_block(&mut self, iseq: IseqPtr) {
+ self.inline_block = Some(iseq);
}
/// Compute a difference score for two context objects
@@ -2023,6 +2840,10 @@ impl Context {
return TypeDiff::Incompatible;
}
+ if src.is_deferred() != dst.is_deferred() {
+ return TypeDiff::Incompatible;
+ }
+
if dst.stack_size != src.stack_size {
return TypeDiff::Incompatible;
}
@@ -2031,7 +2852,7 @@ impl Context {
return TypeDiff::Incompatible;
}
- if dst.reg_temps != src.reg_temps {
+ if dst.reg_mapping != src.reg_mapping {
return TypeDiff::Incompatible;
}
@@ -2044,8 +2865,15 @@ impl Context {
TypeDiff::Incompatible => return TypeDiff::Incompatible,
};
+ // Check the block to inline
+ if src.inline_block != dst.inline_block {
+ // find_block_version should not find existing blocks with different
+ // inline_block so that their yield will not be megamorphic.
+ return TypeDiff::Incompatible;
+ }
+
// For each local type we track
- for i in 0.. MAX_LOCAL_TYPES {
+ for i in 0.. MAX_CTX_LOCALS {
let t_src = src.get_local_type(i);
let t_dst = dst.get_local_type(i);
diff += match t_src.diff(t_dst) {
@@ -2061,7 +2889,7 @@ impl Context {
// If the two mappings aren't the same
if src_mapping != dst_mapping {
- if dst_mapping.get_kind() == MapToStack {
+ if matches!(dst_mapping, MapToStack(_)) {
// We can safely drop information about the source of the temp
// stack operand.
diff += 1;
@@ -2082,8 +2910,31 @@ impl Context {
return TypeDiff::Compatible(diff);
}
+ /// Basically diff() but allows RegMapping incompatibility that could be fixed by
+ /// spilling, loading, or shuffling registers.
+ pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff {
+ // We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack.
+ // If dst has RegOpnd::Stack, we can't reuse the block as a callee.
+ for reg_opnd in dst.get_reg_mapping().get_reg_opnds() {
+ if matches!(reg_opnd, RegOpnd::Stack(_)) {
+ return TypeDiff::Incompatible;
+ }
+ }
+
+ // Prepare a Context with the same registers
+ let mut dst_with_same_regs = dst.clone();
+ dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
+
+ // Diff registers and other stuff separately, and merge them
+ if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) {
+ TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping()))
+ } else {
+ TypeDiff::Incompatible
+ }
+ }
+
pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
- if jit.at_current_insn() {
+ if jit.at_compile_target() {
let comptime_recv = jit.peek_at_stack(self, 1);
let comptime_arg = jit.peek_at_stack(self, 0);
return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
@@ -2111,44 +2962,42 @@ impl Assembler {
let stack_size: usize = self.ctx.stack_size.into();
// Keep track of the type and mapping of the value
- if stack_size < MAX_TEMP_TYPES {
+ if stack_size < MAX_CTX_TEMPS {
self.ctx.set_temp_mapping(stack_size, mapping);
- if mapping.get_kind() == MapToLocal {
- let idx = mapping.get_local_idx();
- assert!((idx as usize) < MAX_LOCAL_TYPES);
+ if let MapToLocal(local_idx) = mapping {
+ assert!((local_idx as usize) < MAX_CTX_LOCALS);
}
}
- // Allocate a register to the stack operand
- if self.ctx.stack_size < MAX_REG_TEMPS {
- self.alloc_temp_reg(self.ctx.stack_size);
- }
-
self.ctx.stack_size += 1;
self.ctx.sp_offset += 1;
- return self.stack_opnd(0);
+ // Allocate a register to the new stack operand
+ let stack_opnd = self.stack_opnd(0);
+ self.alloc_reg(stack_opnd.reg_opnd());
+
+ stack_opnd
}
/// Push one new value on the temp stack
/// Return a pointer to the new stack top
pub fn stack_push(&mut self, val_type: Type) -> Opnd {
- return self.stack_push_mapping(TempMapping::map_to_stack(val_type));
+ return self.stack_push_mapping(TempMapping::MapToStack(val_type));
}
/// Push the self value on the stack
pub fn stack_push_self(&mut self) -> Opnd {
- return self.stack_push_mapping(TempMapping::map_to_self());
+ return self.stack_push_mapping(TempMapping::MapToSelf);
}
/// Push a local variable on the stack
pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd {
- if local_idx >= MAX_LOCAL_TYPES {
+ if local_idx >= MAX_CTX_LOCALS {
return self.stack_push(Type::Unknown);
}
- return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8));
+ return self.stack_push_mapping(TempMapping::MapToLocal(local_idx as u8));
}
// Pop N values off the stack
@@ -2162,8 +3011,8 @@ impl Assembler {
for i in 0..n {
let idx: usize = (self.ctx.stack_size as usize) - i - 1;
- if idx < MAX_TEMP_TYPES {
- self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown));
+ if idx < MAX_CTX_TEMPS {
+ self.ctx.set_temp_mapping(idx, TempMapping::MapToStack(Type::Unknown));
}
}
@@ -2180,11 +3029,11 @@ impl Assembler {
let method_name_index = (self.ctx.stack_size as usize) - argc - 1;
for i in method_name_index..(self.ctx.stack_size - 1) as usize {
- if i < MAX_TEMP_TYPES {
- let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES {
+ if i < MAX_CTX_TEMPS {
+ let next_arg_mapping = if i + 1 < MAX_CTX_TEMPS {
self.ctx.get_temp_mapping(i + 1)
} else {
- TempMapping::map_to_stack(Type::Unknown)
+ TempMapping::MapToStack(Type::Unknown)
};
self.ctx.set_temp_mapping(i, next_arg_mapping);
}
@@ -2198,8 +3047,22 @@ impl Assembler {
idx,
num_bits: 64,
stack_size: self.ctx.stack_size,
+ num_locals: None, // not needed for stack temps
+ sp_offset: self.ctx.sp_offset,
+ reg_mapping: None, // push_insn will set this
+ }
+ }
+
+ /// Get an operand pointing to a local variable
+ pub fn local_opnd(&self, ep_offset: u32) -> Opnd {
+ let idx = self.ctx.stack_size as i32 + ep_offset as i32;
+ Opnd::Stack {
+ idx,
+ num_bits: 64,
+ stack_size: self.ctx.stack_size,
+ num_locals: Some(self.get_num_locals().unwrap()), // this must exist for locals
sp_offset: self.ctx.sp_offset,
- reg_temps: None, // push_insn will set this
+ reg_mapping: None, // push_insn will set this
}
}
}
@@ -2243,7 +3106,7 @@ fn gen_block_series_body(
let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
// Generate code for the first block
- let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
+ let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?;
batch.push(first_block); // Keep track of this block version
// Add the block version to the VersionMap for this ISEQ
@@ -2253,9 +3116,10 @@ fn gen_block_series_body(
let mut last_blockref = first_block;
loop {
// Get the last outgoing branch from the previous block.
- let last_branchref = {
- let last_block = unsafe { last_blockref.as_ref() };
- match last_block.outgoing.last() {
+ // SAFETY: No cell mutation inside unsafe. Copying out a BranchRef.
+ let last_branchref: BranchRef = unsafe {
+ let last_block = last_blockref.as_ref();
+ match last_block.outgoing.0.ref_unchecked().last() {
Some(branch) => *branch,
None => {
break;
@@ -2282,7 +3146,8 @@ fn gen_block_series_body(
};
// Generate new block using context from the last branch.
- let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
+ let requested_ctx = Context::decode(requested_ctx);
+ let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false);
// If the block failed to compile
if result.is_err() {
@@ -2335,7 +3200,7 @@ fn gen_block_series_body(
/// Generate a block version that is an entry point inserted into an iseq
/// NOTE: this function assumes that the VM lock has been taken
/// If jit_exception is true, compile JIT code for handling exceptions.
-/// See [jit_compile_exception] for details.
+/// See jit_compile_exception() for details.
pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> {
// Compute the current instruction index based on the current PC
let cfp = unsafe { get_ec_cfp(ec) };
@@ -2357,16 +3222,33 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
+ let code_ptr = gen_entry_point_body(blockid, stack_size, ec, jit_exception, cb, ocb);
+
+ cb.mark_all_executable();
+ ocb.unwrap().mark_all_executable();
+
+ code_ptr
+}
+
+fn gen_entry_point_body(blockid: BlockId, stack_size: u8, ec: EcPtr, jit_exception: bool, cb: &mut CodeBlock, ocb: &mut OutlinedCb) -> Option<*const u8> {
// Write the interpreter entry prologue. Might be NULL when out of memory.
- let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception);
+ let (code_ptr, reg_mapping) = gen_entry_prologue(cb, ocb, blockid, stack_size, jit_exception)?;
- // Try to generate code for the entry block
+ // Find or compile a block version
let mut ctx = Context::default();
ctx.stack_size = stack_size;
- let block = gen_block_series(blockid, &ctx, ec, cb, ocb);
-
- cb.mark_all_executable();
- ocb.unwrap().mark_all_executable();
+ ctx.reg_mapping = reg_mapping;
+ let block = match find_block_version(blockid, &ctx) {
+ // If an existing block is found, generate a jump to the block.
+ Some(blockref) => {
+ let mut asm = Assembler::new_without_iseq();
+ asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
+ asm.compile(cb, Some(ocb))?;
+ Some(blockref)
+ }
+ // If this block hasn't yet been compiled, generate blocks after the entry guard.
+ None => gen_block_series(blockid, &ctx, ec, cb, ocb),
+ };
match block {
// Compilation failed
@@ -2391,12 +3273,12 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<
incr_counter!(compiled_iseq_entry);
// Compilation successful and block not empty
- code_ptr.map(|ptr| ptr.raw_ptr(cb))
+ Some(code_ptr.raw_ptr(cb))
}
// Change the entry's jump target from an entry stub to a next entry
pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "regenerate_entry");
// gen_entry_guard generates cmp + jne. We're rewriting only jne.
@@ -2429,7 +3311,7 @@ pub fn new_pending_entry() -> PendingEntryRef {
c_callable! {
/// Generated code calls this function with the SysV calling convention.
- /// See [gen_call_entry_stub_hit].
+ /// See [gen_entry_stub].
fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 {
with_compile_time(|| {
with_vm_lock(src_loc!(), || {
@@ -2466,24 +3348,26 @@ fn entry_stub_hit_body(
let cfp = unsafe { get_ec_cfp(ec) };
let iseq = unsafe { get_cfp_iseq(cfp) };
let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?;
+ let blockid = BlockId { iseq, idx: insn_idx };
let stack_size: u8 = unsafe {
u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()?
};
// Compile a new entry guard as a next entry
let next_entry = cb.get_write_ptr();
- let mut asm = Assembler::new();
- let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?;
+ let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
+ let pending_entry = gen_entry_chain_guard(&mut asm, ocb, blockid)?;
+ let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size);
asm.compile(cb, Some(ocb))?;
// Find or compile a block version
- let blockid = BlockId { iseq, idx: insn_idx };
let mut ctx = Context::default();
ctx.stack_size = stack_size;
+ ctx.reg_mapping = reg_mapping;
let blockref = match find_block_version(blockid, &ctx) {
// If an existing block is found, generate a jump to the block.
Some(blockref) => {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
asm.compile(cb, Some(ocb))?;
Some(blockref)
@@ -2503,15 +3387,16 @@ fn entry_stub_hit_body(
get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry());
}
- // Let the stub jump to the block
- blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb))
+ // Return a code pointer if the block is successfully compiled. The entry stub needs
+ // to jump to the entry preceding the block to load the registers in reg_mapping.
+ blockref.map(|_block| next_entry.raw_ptr(cb))
}
/// Generate a stub that calls entry_stub_hit
pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "entry stub hit");
asm.mov(C_ARG_OPNDS[0], entry_address.into());
@@ -2527,7 +3412,7 @@ pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<Code
/// it's useful for Code GC to call entry_stub_hit from a globally shared code.
pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
// See gen_entry_guard for how it's used.
asm_comment!(asm, "entry_stub_hit() trampoline");
@@ -2545,12 +3430,12 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
cb.remove_comments(branch.start_addr, branch.end_addr.get());
// SAFETY: having a &Branch implies branch.block is initialized.
- let block = unsafe { branch.block.as_ref() };
+ let block = unsafe { branch.block.get().as_ref() };
let branch_terminates_block = branch.end_addr.get() == block.get_end_addr();
// Generate the branch
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "regenerate_branch");
branch.gen_fn.call(
&mut asm,
@@ -2558,6 +3443,12 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
branch.get_target_address(1).map(|addr| Target::CodePtr(addr)),
);
+ // If the entire block is the branch and the block could be invalidated,
+ // we need to pad to ensure there is room for invalidation patching.
+ if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
+
// Rewrite the branch
let old_write_pos = cb.get_write_pos();
let old_dropped_bytes = cb.has_dropped_bytes();
@@ -2605,8 +3496,6 @@ fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchR
targets: [Cell::new(None), Cell::new(None)],
});
- incr_counter!(compiled_branch_count); // TODO not true. count at finalize time
-
// Add to the list of outgoing branches for the block
jit.queue_outgoing_branch(branch.clone());
@@ -2639,9 +3528,11 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// SAFETY: We have the VM lock, and the branch is initialized by the time generated
// code calls this function.
+ //
+ // Careful, don't make a `&Block` from `branch.block` here because we might
+ // delete it later in delete_empty_defer_block().
let branch = unsafe { branch_ref.as_ref() };
let branch_size_on_entry = branch.code_size();
- let housing_block = unsafe { branch.block.as_ref() };
let target_idx: usize = target_idx.as_usize();
let target_branch_shape = match target_idx {
@@ -2663,7 +3554,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
return target.get_address().unwrap().raw_ptr(cb);
}
- (target.get_blockid(), target.get_ctx())
+ let target_ctx = Context::decode(target.get_ctx());
+ (target.get_blockid(), target_ctx)
};
let (cfp, original_interp_sp) = unsafe {
@@ -2671,14 +3563,14 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
let original_interp_sp = get_cfp_sp(cfp);
let running_iseq = get_cfp_iseq(cfp);
+ assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq");
+
let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into());
let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into());
// Unlike in the interpreter, our `leave` doesn't write to the caller's
// SP -- we do it in the returned-to code. Account for this difference.
let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into());
- assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq");
-
// Update the PC in the current CFP, because it may be out of sync in JITted code
rb_set_cfp_pc(cfp, reconned_pc);
@@ -2701,6 +3593,13 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
}
+ // Bail if this branch is housed in an invalidated (dead) block.
+ // This only happens in rare invalidation scenarios and we need
+ // to avoid linking a dead block to a live block with a branch.
+ if branch.block.get().as_ref().iseq.get().is_null() {
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
+ }
+
(cfp, original_interp_sp)
};
@@ -2714,7 +3613,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// If the new block can be generated right after the branch (at cb->write_pos)
if cb.get_write_ptr() == branch.end_addr.get() {
// This branch should be terminating its block
- assert!(branch.end_addr == housing_block.end_addr);
+ assert!(branch.end_addr == unsafe { branch.block.get().as_ref() }.end_addr);
// Change the branch shape to indicate the target block will be placed next
branch.gen_fn.set_shape(target_branch_shape);
@@ -2748,6 +3647,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// Branch shape should reflect layout
assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get()));
+ // When block housing this branch is empty, try to free it
+ delete_empty_defer_block(branch, new_block, target_ctx, target_blockid);
+
// Add this branch to the list of incoming branches for the target
new_block.push_incoming(branch_ref);
@@ -2797,19 +3699,68 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
dst_addr.raw_ptr(cb)
}
+/// Part of branch_stub_hit().
+/// If we've hit a deferred branch, and the housing block consists solely of the branch, rewire
+/// incoming branches to the new block and delete the housing block.
+fn delete_empty_defer_block(branch: &Branch, new_block: &Block, target_ctx: Context, target_blockid: BlockId)
+{
+ // This &Block should be unique, relying on the VM lock
+ let housing_block: &Block = unsafe { branch.block.get().as_ref() };
+ if target_ctx.is_deferred() &&
+ target_blockid == housing_block.get_blockid() &&
+ housing_block.outgoing.len() == 1 &&
+ {
+ // The block is empty when iseq_range is one instruction long.
+ let range = &housing_block.iseq_range;
+ let iseq = housing_block.iseq.get();
+ let start_opcode = iseq_opcode_at_idx(iseq, range.start.into()) as usize;
+ let empty_end = range.start + insn_len(start_opcode) as IseqIdx;
+ range.end == empty_end
+ }
+ {
+ // Divert incoming branches of housing_block to the new block
+ housing_block.incoming.for_each(|incoming| {
+ let incoming = unsafe { incoming.as_ref() };
+ for target in 0..incoming.targets.len() {
+ // SAFETY: No cell mutation; copying out a BlockRef.
+ if Some(BlockRef::from(housing_block)) == unsafe {
+ incoming.targets[target]
+ .ref_unchecked()
+ .as_ref()
+ .and_then(|target| target.get_block())
+ } {
+ incoming.targets[target].set(Some(Box::new(BranchTarget::Block(new_block.into()))));
+ }
+ }
+ new_block.push_incoming(incoming.into());
+ });
+
+ // Transplant the branch we've just hit to the new block
+ mem::drop(housing_block.outgoing.0.take());
+ new_block.outgoing.push(branch.into());
+ let housing_block: BlockRef = branch.block.replace(new_block.into());
+ // Free the old housing block; there should now be no live &Block.
+ remove_block_version(&housing_block);
+ unsafe { free_block(housing_block, false) };
+
+ incr_counter!(deleted_defer_block_count);
+ }
+}
+
/// Generate a "stub", a piece of code that calls the compiler back when run.
/// A piece of code that redeems for more code; a thunk for code.
fn gen_branch_stub(
- ctx: &Context,
+ ctx: u32,
+ iseq: IseqPtr,
ocb: &mut OutlinedCb,
branch_struct_address: usize,
target_idx: u32,
) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
- asm.ctx = *ctx;
- asm.set_reg_temps(ctx.reg_temps);
+ let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
+ asm.ctx = Context::decode(ctx);
+ asm.set_reg_mapping(asm.ctx.reg_mapping);
asm_comment!(asm, "branch stub hit");
if asm.ctx.is_return_landing() {
@@ -2825,7 +3776,7 @@ fn gen_branch_stub(
}
// Spill temps to the VM stack as well for jit.peek_at_stack()
- asm.spill_temps();
+ asm.spill_regs();
// Set up the arguments unique to this stub for:
//
@@ -2845,10 +3796,10 @@ fn gen_branch_stub(
pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
// For `branch_stub_hit(branch_ptr, target_idx, ec)`,
- // `branch_ptr` and `target_idx` is different for each stub,
+ // `branch_ptr` and `target_idx` are different for each stub,
// but the call and what's after is the same. This trampoline
// is the unchanging part.
// Since this trampoline is static, it allows code GC inside
@@ -2881,7 +3832,7 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
}
/// Return registers to be pushed and popped on branch_stub_hit.
-fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator {
+pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator {
let temp_regs = Assembler::get_temp_regs().iter();
let len = temp_regs.len();
// The return value gen_leave() leaves in C_RET_REG
@@ -2946,26 +3897,26 @@ impl Assembler
}
}
+#[must_use]
pub fn gen_branch(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
target0: BlockId,
ctx0: &Context,
target1: Option<BlockId>,
ctx1: Option<&Context>,
gen_fn: BranchGenFn,
-) {
+) -> Option<()> {
let branch = new_pending_branch(jit, gen_fn);
// Get the branch targets or stubs
- let target0_addr = branch.set_target(0, target0, ctx0, ocb);
+ let target0_addr = branch.set_target(0, target0, ctx0, jit)?;
let target1_addr = if let Some(ctx) = ctx1 {
- let addr = branch.set_target(1, target1.unwrap(), ctx, ocb);
+ let addr = branch.set_target(1, target1.unwrap(), ctx, jit);
if addr.is_none() {
// target1 requested but we're out of memory.
// Avoid unwrap() in gen_fn()
- return;
+ return None;
}
addr
@@ -2973,10 +3924,10 @@ pub fn gen_branch(
// Call the branch generation function
asm.mark_branch_start(&branch);
- if let Some(dst_addr) = target0_addr {
- branch.gen_fn.call(asm, Target::CodePtr(dst_addr), target1_addr.map(|addr| Target::CodePtr(addr)));
- }
+ branch.gen_fn.call(asm, Target::CodePtr(target0_addr), target1_addr.map(|addr| Target::CodePtr(addr)));
asm.mark_branch_end(&branch);
+
+ Some(())
}
pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) {
@@ -3006,7 +3957,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
// compile the target block right after this one (fallthrough).
BranchTarget::Stub(Box::new(BranchStub {
address: None,
- ctx: *ctx,
+ ctx: Context::encode(ctx),
iseq: Cell::new(target0.iseq),
iseq_idx: target0.idx,
}))
@@ -3016,18 +3967,14 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
}
/// Create a stub to force the code up to this point to be executed
-pub fn defer_compilation(
- jit: &mut JITState,
- asm: &mut Assembler,
- ocb: &mut OutlinedCb,
-) {
- if asm.ctx.get_chain_depth() != 0 {
+pub fn defer_compilation(jit: &mut JITState, asm: &mut Assembler) -> Result<(), ()> {
+ if asm.ctx.is_deferred() {
panic!("Double defer!");
}
let mut next_ctx = asm.ctx;
- next_ctx.increment_chain_depth();
+ next_ctx.mark_as_deferred();
let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default)));
@@ -3036,15 +3983,19 @@ pub fn defer_compilation(
idx: jit.get_insn_idx(),
};
- // Likely a stub due to the increased chain depth
- let target0_address = branch.set_target(0, blockid, &next_ctx, ocb);
+ // Likely a stub since the context is marked as deferred().
+ let dst_addr = branch.set_target(0, blockid, &next_ctx, jit).ok_or(())?;
+
+ // Pad the block if it has the potential to be invalidated. This must be
+ // done before gen_fn() in case the jump is overwritten by a fallthrough.
+ if jit.block_entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
// Call the branch generation function
asm_comment!(asm, "defer_compilation");
asm.mark_branch_start(&branch);
- if let Some(dst_addr) = target0_address {
- branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None);
- }
+ branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None);
asm.mark_branch_end(&branch);
// If the block we're deferring from is empty
@@ -3053,6 +4004,8 @@ pub fn defer_compilation(
}
incr_counter!(defer_count);
+
+ Ok(())
}
/// Remove a block from the live control flow graph.
@@ -3085,7 +4038,7 @@ unsafe fn remove_from_graph(blockref: BlockRef) {
}
// For each outgoing branch
- for out_branchref in block.outgoing.iter() {
+ block.outgoing.for_each(|out_branchref| {
let out_branch = unsafe { out_branchref.as_ref() };
// For each successor block
for out_target in out_branch.targets.iter() {
@@ -3101,16 +4054,16 @@ unsafe fn remove_from_graph(blockref: BlockRef) {
// Temporarily move out of succ_block.incoming.
let succ_incoming = succ_block.incoming.0.take();
let mut succ_incoming = succ_incoming.into_vec();
- succ_incoming.retain(|branch| branch != out_branchref);
+ succ_incoming.retain(|branch| *branch != out_branchref);
succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort
}
}
- }
+ });
}
/// Tear down a block and deallocate it.
/// Caller has to ensure that the code tracked by the block is not
-/// running, as running code may hit [branch_stub_hit] who exepcts
+/// running, as running code may hit [branch_stub_hit] who expects
/// [Branch] to be live.
///
/// We currently ensure this through the `jit_cont` system in cont.c
@@ -3139,7 +4092,7 @@ pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) {
/// Caller must ensure that we have unique ownership for the referent block
unsafe fn dealloc_block(blockref: BlockRef) {
unsafe {
- for outgoing in blockref.as_ref().outgoing.iter() {
+ for outgoing in blockref.as_ref().outgoing.0.take().iter() {
// this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch
mem::drop(Box::from_raw(outgoing.as_ptr()));
}
@@ -3215,16 +4168,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
let cur_dropped_bytes = cb.has_dropped_bytes();
cb.set_write_ptr(block_start);
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
asm.jmp(block_entry_exit.as_side_exit());
cb.set_dropped_bytes(false);
asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code");
assert!(
cb.get_write_ptr() <= block_end,
- "invalidation wrote past end of block (code_size: {:?}, new_size: {})",
+ "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})",
block.code_size(),
cb.get_write_ptr().as_offset() - block_start.as_offset(),
+ block.start_addr.raw_ptr(cb),
);
cb.set_write_ptr(cur_pos);
cb.set_dropped_bytes(cur_dropped_bytes);
@@ -3232,7 +4186,23 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// For each incoming branch
- for branchref in block.incoming.0.take().iter() {
+ let mut incoming_branches = block.incoming.0.take();
+
+ // An adjacent branch will write into the start of the block being invalidated, possibly
+ // overwriting the block's exit. If we run out of memory after doing this, any subsequent
+ // incoming branches we rewrite won't be able use the block's exit as a fallback when they
+ // are unable to generate a stub. To avoid this, if there's an incoming branch that's
+ // adjacent to the invalidated block, make sure we process it last.
+ let adjacent_branch_idx = incoming_branches.iter().position(|branchref| {
+ let branch = unsafe { branchref.as_ref() };
+ let target_next = block.start_addr == branch.end_addr.get();
+ target_next
+ });
+ if let Some(adjacent_branch_idx) = adjacent_branch_idx {
+ incoming_branches.swap(adjacent_branch_idx, incoming_branches.len() - 1)
+ }
+
+ for (i, branchref) in incoming_branches.iter().enumerate() {
let branch = unsafe { branchref.as_ref() };
let target_idx = if branch.get_target_address(0) == Some(block_start) {
0
@@ -3251,7 +4221,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// Create a stub for this branch target
- let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
+ let stub_addr = gen_branch_stub(block.ctx, block.iseq.get(), ocb, branchref.as_ptr() as usize, target_idx as u32);
// In case we were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
@@ -3272,10 +4242,18 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
let target_next = block.start_addr == branch.end_addr.get();
if target_next {
- // The new block will no longer be adjacent.
- // Note that we could be enlarging the branch and writing into the
- // start of the block being invalidated.
- branch.gen_fn.set_shape(BranchShape::Default);
+ if stub_addr != block.start_addr {
+ // The new block will no longer be adjacent.
+ // Note that we could be enlarging the branch and writing into the
+ // start of the block being invalidated.
+ branch.gen_fn.set_shape(BranchShape::Default);
+ } else {
+ // The branch target is still adjacent, so the branch must remain
+ // a fallthrough so we don't overwrite the target with a jump.
+ //
+ // This can happen if we're unable to generate a stub and the
+ // target block also exits on entry (block_start == block_entry_exit).
+ }
}
// Rewrite the branch with the new jump target address
@@ -3285,6 +4263,11 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
if target_next && branch.end_addr > block.end_addr {
panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size());
}
+ let is_last_incoming_branch = i == incoming_branches.len() - 1;
+ if target_next && branch.end_addr.get() > block_entry_exit && !is_last_incoming_branch {
+ // We might still need to jump to this exit if we run out of memory when rewriting another incoming branch.
+ panic!("yjit invalidate rewrote branch over exit of invalidated block: {:?}", branch);
+ }
if !target_next && branch.code_size() > old_branch_size {
panic!(
"invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})",
@@ -3323,11 +4306,9 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
incr_counter!(invalidation_count);
}
-// We cannot deallocate blocks immediately after invalidation since there
-// could be stubs waiting to access branch pointers. Return stubs can do
-// this since patching the code for setting up return addresses does not
-// affect old return addresses that are already set up to use potentially
-// invalidated branch pointers. Example:
+// We cannot deallocate blocks immediately after invalidation since patching the code for setting
+// up return addresses does not affect outstanding return addresses that are on stack and will use
+// invalidated branch pointers when hit. Example:
// def foo(n)
// if n == 2
// # 1.times.each to create a cfunc frame to preserve the JIT frame
@@ -3335,13 +4316,16 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// return 1.times.each { Object.define_method(:foo) {} }
// end
//
-// foo(n + 1)
+// foo(n + 1) # The block for this call houses the return branch stub
// end
// p foo(1)
pub fn delayed_deallocation(blockref: BlockRef) {
block_assumptions_free(blockref);
- let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap();
+ let block = unsafe { blockref.as_ref() };
+ // Set null ISEQ on the block to signal that it's dead.
+ let iseq = block.iseq.replace(ptr::null());
+ let payload = get_iseq_payload(iseq).unwrap();
payload.dead_blocks.push(blockref);
}
@@ -3385,19 +4369,14 @@ mod tests {
// and all local types in 32 bits
assert_eq!(mem::size_of::<Type>(), 1);
assert!(Type::BlockParamProxy as usize <= 0b1111);
- assert!(MAX_LOCAL_TYPES * 4 <= 32);
- }
-
- #[test]
- fn tempmapping_size() {
- assert_eq!(mem::size_of::<TempMapping>(), 1);
+ assert!(MAX_CTX_LOCALS * 4 <= 32);
}
#[test]
fn local_types() {
let mut ctx = Context::default();
- for i in 0..MAX_LOCAL_TYPES {
+ for i in 0..MAX_CTX_LOCALS {
ctx.set_local_type(i, Type::Fixnum);
assert_eq!(ctx.get_local_type(i), Type::Fixnum);
ctx.set_local_type(i, Type::BlockParamProxy);
@@ -3419,26 +4398,6 @@ mod tests {
}
#[test]
- fn tempmapping() {
- let t = TempMapping::map_to_stack(Type::Unknown);
- assert_eq!(t.get_kind(), MapToStack);
- assert_eq!(t.get_type(), Type::Unknown);
-
- let t = TempMapping::map_to_stack(Type::TString);
- assert_eq!(t.get_kind(), MapToStack);
- assert_eq!(t.get_type(), Type::TString);
-
- let t = TempMapping::map_to_local(7);
- assert_eq!(t.get_kind(), MapToLocal);
- assert_eq!(t.get_local_idx(), 7);
- }
-
- #[test]
- fn context_size() {
- assert_eq!(mem::size_of::<Context>(), 15);
- }
-
- #[test]
fn types() {
// Valid src => dst
assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0));
@@ -3454,41 +4413,30 @@ mod tests {
}
#[test]
- fn reg_temps() {
- let mut reg_temps = RegTemps(0);
+ fn reg_mapping() {
+ let mut reg_mapping = RegMapping([None, None, None, None, None]);
// 0 means every slot is not spilled
- for stack_idx in 0..MAX_REG_TEMPS {
- assert_eq!(reg_temps.get(stack_idx), false);
+ for stack_idx in 0..MAX_CTX_TEMPS as u8 {
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(stack_idx)), None);
}
- // Set 0, 2, 7 (RegTemps: 10100001)
- reg_temps.set(0, true);
- reg_temps.set(2, true);
- reg_temps.set(3, true);
- reg_temps.set(3, false);
- reg_temps.set(7, true);
+ // Set 0, 2, 6 (RegMapping: [Some(0), Some(6), Some(2), None, None])
+ reg_mapping.alloc_reg(RegOpnd::Stack(0));
+ reg_mapping.alloc_reg(RegOpnd::Stack(2));
+ reg_mapping.alloc_reg(RegOpnd::Stack(3));
+ reg_mapping.dealloc_reg(RegOpnd::Stack(3));
+ reg_mapping.alloc_reg(RegOpnd::Stack(6));
// Get 0..8
- assert_eq!(reg_temps.get(0), true);
- assert_eq!(reg_temps.get(1), false);
- assert_eq!(reg_temps.get(2), true);
- assert_eq!(reg_temps.get(3), false);
- assert_eq!(reg_temps.get(4), false);
- assert_eq!(reg_temps.get(5), false);
- assert_eq!(reg_temps.get(6), false);
- assert_eq!(reg_temps.get(7), true);
-
- // Test conflicts
- assert_eq!(5, get_option!(num_temp_regs));
- assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict
- assert_eq!(reg_temps.conflicts_with(1), false);
- assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7
- assert_eq!(reg_temps.conflicts_with(3), false);
- assert_eq!(reg_temps.conflicts_with(4), false);
- assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0
- assert_eq!(reg_temps.conflicts_with(6), false);
- assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(0)), Some(0));
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(1)), None);
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(2)), Some(2));
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(3)), None);
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(4)), None);
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(5)), None);
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(6)), Some(1));
+ assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(7)), None);
}
#[test]
@@ -3497,7 +4445,7 @@ mod tests {
assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0));
// Try pushing an operand and getting its type
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
asm.stack_push(Type::Fixnum);
let top_type = asm.ctx.get_opnd_type(StackOpnd(0));
assert!(top_type == Type::Fixnum);
@@ -3506,8 +4454,42 @@ mod tests {
}
#[test]
+ fn context_upgrade_local() {
+ let mut asm = Assembler::new(0);
+ asm.stack_push_local(0);
+ asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil);
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ }
+
+ #[test]
+ fn context_chain_depth() {
+ let mut ctx = Context::default();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_return_landing(), false);
+ assert_eq!(ctx.is_deferred(), false);
+
+ for _ in 0..5 {
+ ctx.increment_chain_depth();
+ }
+ assert_eq!(ctx.get_chain_depth(), 5);
+
+ ctx.set_as_return_landing();
+ assert_eq!(ctx.is_return_landing(), true);
+
+ ctx.clear_return_landing();
+ assert_eq!(ctx.is_return_landing(), false);
+
+ ctx.mark_as_deferred();
+ assert_eq!(ctx.is_deferred(), true);
+
+ ctx.reset_chain_depth_and_defer();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_deferred(), false);
+ }
+
+ #[test]
fn shift_stack_for_send() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new(0);
// Push values to simulate send(:name, arg) with 6 items already on-stack
for _ in 0..6 {
@@ -3532,8 +4514,9 @@ mod tests {
idx: 0,
};
let cb = CodeBlock::new_dummy(1024);
+ let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024));
let dumm_addr = cb.get_write_ptr();
- let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null())
+ let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true)
.into_block(0, dumm_addr, dumm_addr, vec![]);
let _dropper = BlockDropper(block);
@@ -3541,14 +4524,14 @@ mod tests {
// we're always working with &Branch (a shared reference to a Branch).
let branch: &Branch = &Branch {
gen_fn: BranchGenFn::JZToTarget0,
- block,
+ block: Cell::new(block),
start_addr: dumm_addr,
end_addr: Cell::new(dumm_addr),
targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
- ctx: Context::default(),
+ ctx: 0,
})))))]
};
// For easier soundness reasoning, make sure the reference returned does not out live the
@@ -3581,7 +4564,7 @@ mod tests {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
- ctx: Context::default(),
+ ctx: 0,
})))));
// Invalid ISeq; we never dereference it.
let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr();
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index ac0bdf6885..d34b049a45 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -83,7 +83,8 @@
#![allow(non_upper_case_globals)]
use std::convert::From;
-use std::ffi::CString;
+use std::ffi::{CString, CStr};
+use std::fmt::{Debug, Formatter};
use std::os::raw::{c_char, c_int, c_uint};
use std::panic::{catch_unwind, UnwindSafe};
@@ -107,13 +108,25 @@ pub use autogened::*;
// TODO: For #defines that affect memory layout, we need to check for them
// on build and fail if they're wrong. e.g. USE_FLONUM *must* be true.
-// These are functions we expose from vm_insnhelper.c, not in any header.
+// These are functions we expose from C files, not in any header.
// Parsing it would result in a lot of duplicate definitions.
// Use bindgen for functions that are defined in headers or in yjit.c.
#[cfg_attr(test, allow(unused))] // We don't link against C code when testing
extern "C" {
+ pub fn rb_check_overloaded_cme(
+ me: *const rb_callable_method_entry_t,
+ ci: *const rb_callinfo,
+ ) -> *const rb_callable_method_entry_t;
+
+ // Floats within range will be encoded without creating objects in the heap.
+ // (Range is 0x3000000000000001 to 0x4fffffffffffffff (1.7272337110188893E-77 to 2.3158417847463237E+77).
+ pub fn rb_float_new(d: f64) -> VALUE;
+
+ pub fn rb_hash_empty_p(hash: VALUE) -> VALUE;
+ pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE;
pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE;
pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE;
+ pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE;
pub fn rb_vm_defined(
ec: EcPtr,
reg_cfp: CfpPtr,
@@ -135,6 +148,8 @@ extern "C" {
ic: ICVARC,
) -> VALUE;
pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool;
+ pub fn rb_vm_stack_canary() -> VALUE;
+ pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int);
}
// Renames
@@ -161,11 +176,11 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size;
pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq;
pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded;
pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max;
+pub use rb_get_iseq_body_type as get_iseq_body_type;
pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead;
pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt;
pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw;
pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest;
-pub use rb_get_iseq_flags_ruby2_keywords as get_iseq_flags_ruby2_keywords;
pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post;
pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest;
pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block;
@@ -182,8 +197,8 @@ pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx;
pub use rb_get_call_data_ci as get_call_data_ci;
pub use rb_yarv_str_eql_internal as rb_str_eql_internal;
pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal;
-pub use rb_yjit_fix_div_fix as rb_fix_div_fix;
-pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix;
+pub use rb_jit_fix_div_fix as rb_fix_div_fix;
+pub use rb_jit_fix_mod_fix as rb_fix_mod_fix;
pub use rb_FL_TEST as FL_TEST;
pub use rb_FL_TEST_RAW as FL_TEST_RAW;
pub use rb_RB_TYPE_P as RB_TYPE_P;
@@ -199,8 +214,6 @@ pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN;
/// Helper so we can get a Rust string for insn_name()
pub fn insn_name(opcode: usize) -> String {
- use std::ffi::CStr;
-
unsafe {
// Look up Ruby's NULL-terminated insn name string
let op_name = raw_insn_name(VALUE(opcode));
@@ -255,6 +268,18 @@ pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 {
unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 }
}
+/// Return a poison value to be set above the stack top to verify leafness.
+#[cfg(not(test))]
+pub fn vm_stack_canary() -> u64 {
+ unsafe { rb_vm_stack_canary() }.as_u64()
+}
+
+/// Avoid linking the C function in `cargo test`
+#[cfg(test)]
+pub fn vm_stack_canary() -> u64 {
+ 0
+}
+
/// Opaque execution-context type from vm_core.h
#[repr(C)]
pub struct rb_execution_context_struct {
@@ -289,13 +314,6 @@ pub struct rb_callcache {
_marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
}
-/// Opaque call-info type from vm_callinfo.h
-#[repr(C)]
-pub struct rb_callinfo_kwarg {
- _data: [u8; 0],
- _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
-}
-
/// Opaque control_frame (CFP) struct from vm_core.h
#[repr(C)]
pub struct rb_control_frame_struct {
@@ -343,6 +361,11 @@ impl VALUE {
!self.special_const_p()
}
+ /// Shareability between ractors. `RB_OBJ_SHAREABLE_P()`.
+ pub fn shareable_p(self) -> bool {
+ (self.builtin_flags() & RUBY_FL_SHAREABLE as usize) != 0
+ }
+
/// Return true if the value is a Ruby Fixnum (immediate-size integer)
pub fn fixnum_p(self) -> bool {
let VALUE(cval) = self;
@@ -379,6 +402,11 @@ impl VALUE {
}
}
+ /// Returns true if the value is T_HASH
+ pub fn hash_p(self) -> bool {
+ !self.special_const_p() && self.builtin_type() == RUBY_T_HASH
+ }
+
/// Returns true or false depending on whether the value is nil
pub fn nil_p(self) -> bool {
self == Qnil
@@ -417,28 +445,16 @@ impl VALUE {
}
pub fn shape_too_complex(self) -> bool {
- unsafe { rb_shape_obj_too_complex(self) }
+ unsafe { rb_yjit_shape_obj_too_complex_p(self) }
}
pub fn shape_id_of(self) -> u32 {
- unsafe { rb_shape_get_shape_id(self) }
- }
-
- pub fn shape_of(self) -> *mut rb_shape {
- unsafe {
- let shape = rb_shape_get_shape_by_id(self.shape_id_of());
-
- if shape.is_null() {
- panic!("Shape should not be null");
- } else {
- shape
- }
- }
+ unsafe { rb_obj_shape_id(self) }
}
pub fn embedded_p(self) -> bool {
unsafe {
- FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0)
+ FL_TEST_RAW(self, VALUE(ROBJECT_HEAP as usize)) == VALUE(0)
}
}
@@ -518,9 +534,7 @@ impl VALUE {
ptr
}
-}
-impl VALUE {
pub fn fixnum_from_usize(item: usize) -> Self {
assert!(item <= (RUBY_FIXNUM_MAX as usize)); // An unsigned will always be greater than RUBY_FIXNUM_MIN
let k: usize = item.wrapping_add(item.wrapping_add(1));
@@ -542,6 +556,18 @@ impl From<*const rb_callable_method_entry_t> for VALUE {
}
}
+impl From<&str> for VALUE {
+ fn from(value: &str) -> Self {
+ rust_str_to_ruby(value)
+ }
+}
+
+impl From<String> for VALUE {
+ fn from(value: String) -> Self {
+ rust_str_to_ruby(&value)
+ }
+}
+
impl From<VALUE> for u64 {
fn from(value: VALUE) -> Self {
let VALUE(uimm) = value;
@@ -573,23 +599,27 @@ impl From<VALUE> for u16 {
}
/// Produce a Ruby string from a Rust string slice
-#[cfg(feature = "disasm")]
pub fn rust_str_to_ruby(str: &str) -> VALUE {
unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) }
}
/// Produce a Ruby symbol from a Rust string slice
pub fn rust_str_to_sym(str: &str) -> VALUE {
+ let id = rust_str_to_id(str);
+ unsafe { rb_id2sym(id) }
+}
+
+/// Produce an ID from a Rust string slice
+pub fn rust_str_to_id(str: &str) -> ID {
let c_str = CString::new(str).unwrap();
let c_ptr: *const c_char = c_str.as_ptr();
- unsafe { rb_id2sym(rb_intern(c_ptr)) }
+ unsafe { rb_intern(c_ptr) }
}
/// Produce an owned Rust String from a C char pointer
pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
assert!(c_char_ptr != std::ptr::null());
- use std::ffi::CStr;
let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) };
match c_str.to_str() {
@@ -601,17 +631,26 @@ pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
/// A location in Rust code for integrating with debugging facilities defined in C.
/// Use the [src_loc!] macro to crate an instance.
pub struct SourceLocation {
- pub file: CString,
+ pub file: &'static CStr,
pub line: c_int,
}
+impl Debug for SourceLocation {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.write_fmt(format_args!("{}:{}", self.file.to_string_lossy(), self.line))
+ }
+}
+
/// Make a [SourceLocation] at the current spot.
macro_rules! src_loc {
() => {
- // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds.
- $crate::cruby::SourceLocation {
- file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths
- line: line!().try_into().unwrap(), // not that many lines
+ {
+ // Nul-terminated string with static lifetime, make a CStr out of it safely.
+ let file: &'static str = concat!(file!(), '\0');
+ $crate::cruby::SourceLocation {
+ file: unsafe { std::ffi::CStr::from_ptr(file.as_ptr().cast()) },
+ line: line!().try_into().unwrap(),
+ }
}
};
}
@@ -642,28 +681,27 @@ where
let line = loc.line;
let mut recursive_lock_level: c_uint = 0;
- unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) };
+ unsafe { rb_jit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) };
let ret = match catch_unwind(func) {
Ok(result) => result,
Err(_) => {
// Theoretically we can recover from some of these panics,
// but it's too late if the unwind reaches here.
- use std::{process, str};
let _ = catch_unwind(|| {
// IO functions can panic too.
eprintln!(
"YJIT panicked while holding VM lock acquired at {}:{}. Aborting...",
- str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"),
+ loc.file.to_string_lossy(),
line,
);
});
- process::abort();
+ std::process::abort();
}
};
- unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) };
+ unsafe { rb_jit_vm_unlock(&mut recursive_lock_level, file, line) };
ret
}
@@ -693,8 +731,10 @@ mod manual_defs {
pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2;
// From vm_callinfo.h - uses calculation that seems to confuse bindgen
+ pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit;
pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit;
pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit;
+ pub const VM_CALL_FORWARDING: u32 = 1 << VM_CALL_FORWARDING_bit;
pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit;
pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit;
pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit;
@@ -719,6 +759,9 @@ mod manual_defs {
pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr"
pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary"
+ pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr"
+ pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary"
+
// Constants from rb_control_frame_t vm_core.h
pub const RUBY_OFFSET_CFP_PC: i32 = 0;
pub const RUBY_OFFSET_CFP_SP: i32 = 8;
@@ -729,12 +772,6 @@ mod manual_defs {
pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48;
pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56;
- // Constants from rb_execution_context_t vm_core.h
- pub const RUBY_OFFSET_EC_CFP: i32 = 16;
- pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32)
- pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32)
- pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48;
-
// Constants from rb_thread_t in vm_core.h
pub const RUBY_OFFSET_THREAD_SELF: i32 = 16;
@@ -745,17 +782,16 @@ mod manual_defs {
pub use manual_defs::*;
/// Interned ID values for Ruby symbols and method names.
-/// See [crate::cruby::ID] and usages outside of YJIT.
+/// See [type@crate::cruby::ID] and usages outside of YJIT.
pub(crate) mod ids {
use std::sync::atomic::AtomicU64;
/// Globals to cache IDs on boot. Atomic to use with relaxed ordering
- /// so reads can happen without `unsafe`. Initialization is done
- /// single-threaded and release-acquire on [crate::yjit::YJIT_ENABLED]
- /// makes sure we read the cached values after initialization is done.
+ /// so reads can happen without `unsafe`. Synchronization done through
+ /// the VM lock.
macro_rules! def_ids {
($(name: $ident:ident content: $str:literal)*) => {
$(
- #[doc = concat!("[crate::cruby::ID] for `", stringify!($str), "`")]
+ #[doc = concat!("[type@crate::cruby::ID] for `", stringify!($str), "`")]
pub static $ident: AtomicU64 = AtomicU64::new(0);
)*
@@ -777,11 +813,12 @@ pub(crate) mod ids {
def_ids! {
name: NULL content: b""
- name: min content: b"min"
- name: max content: b"max"
- name: hash content: b"hash"
name: respond_to_missing content: b"respond_to_missing?"
+ name: method_missing content: b"method_missing"
name: to_ary content: b"to_ary"
+ name: to_s content: b"to_s"
+ name: eq content: b"=="
+ name: include_p content: b"include?"
}
}
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 462c9c5748..56994388a3 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.63.0 */
+/* automatically generated by rust-bindgen 0.70.1 */
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
@@ -81,6 +81,36 @@ where
}
}
#[repr(C)]
+#[derive(Default)]
+pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]);
+impl<T> __IncompleteArrayField<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ __IncompleteArrayField(::std::marker::PhantomData, [])
+ }
+ #[inline]
+ pub fn as_ptr(&self) -> *const T {
+ self as *const _ as *const T
+ }
+ #[inline]
+ pub fn as_mut_ptr(&mut self) -> *mut T {
+ self as *mut _ as *mut T
+ }
+ #[inline]
+ pub unsafe fn as_slice(&self, len: usize) -> &[T] {
+ ::std::slice::from_raw_parts(self.as_ptr(), len)
+ }
+ #[inline]
+ pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] {
+ ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len)
+ }
+}
+impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> {
+ fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ fmt.write_str("__IncompleteArrayField")
+ }
+}
+#[repr(C)]
pub struct __BindgenUnionField<T>(::std::marker::PhantomData<T>);
impl<T> __BindgenUnionField<T> {
#[inline]
@@ -105,7 +135,7 @@ impl<T> ::std::default::Default for __BindgenUnionField<T> {
impl<T> ::std::clone::Clone for __BindgenUnionField<T> {
#[inline]
fn clone(&self) -> Self {
- Self::new()
+ *self
}
}
impl<T> ::std::marker::Copy for __BindgenUnionField<T> {}
@@ -135,13 +165,13 @@ pub const NIL_REDEFINED_OP_FLAG: u32 = 512;
pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024;
pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048;
pub const PROC_REDEFINED_OP_FLAG: u32 = 4096;
+pub const VM_KW_SPECIFIED_BITS_MAX: u32 = 31;
pub const VM_ENV_DATA_SIZE: u32 = 3;
pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2;
pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1;
pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0;
pub const VM_BLOCK_HANDLER_NONE: u32 = 0;
pub const SHAPE_ID_NUM_BITS: u32 = 32;
-pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 11;
pub type ID = ::std::os::raw::c_ulong;
pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>;
pub const RUBY_Qfalse: ruby_special_consts = 0;
@@ -193,13 +223,12 @@ pub const RUBY_FL_USHIFT: ruby_fl_ushift = 12;
pub type ruby_fl_ushift = u32;
pub const RUBY_FL_WB_PROTECTED: ruby_fl_type = 32;
pub const RUBY_FL_PROMOTED: ruby_fl_type = 32;
-pub const RUBY_FL_UNUSED6: ruby_fl_type = 64;
+pub const RUBY_FL_USERPRIV0: ruby_fl_type = 64;
pub const RUBY_FL_FINALIZE: ruby_fl_type = 128;
-pub const RUBY_FL_TAINT: ruby_fl_type = 0;
+pub const RUBY_FL_EXIVAR: ruby_fl_type = 0;
pub const RUBY_FL_SHAREABLE: ruby_fl_type = 256;
-pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 0;
-pub const RUBY_FL_SEEN_OBJ_ID: ruby_fl_type = 512;
-pub const RUBY_FL_EXIVAR: ruby_fl_type = 1024;
+pub const RUBY_FL_WEAK_REFERENCE: ruby_fl_type = 512;
+pub const RUBY_FL_UNUSED10: ruby_fl_type = 1024;
pub const RUBY_FL_FREEZE: ruby_fl_type = 2048;
pub const RUBY_FL_USER0: ruby_fl_type = 4096;
pub const RUBY_FL_USER1: ruby_fl_type = 8192;
@@ -221,8 +250,8 @@ pub const RUBY_FL_USER16: ruby_fl_type = 268435456;
pub const RUBY_FL_USER17: ruby_fl_type = 536870912;
pub const RUBY_FL_USER18: ruby_fl_type = 1073741824;
pub const RUBY_FL_USER19: ruby_fl_type = -2147483648;
-pub const RUBY_ELTS_SHARED: ruby_fl_type = 16384;
-pub const RUBY_FL_SINGLETON: ruby_fl_type = 4096;
+pub const RUBY_ELTS_SHARED: ruby_fl_type = 4096;
+pub const RUBY_FL_SINGLETON: ruby_fl_type = 8192;
pub type ruby_fl_type = i32;
pub const RSTRING_NOEMBED: ruby_rstring_flags = 8192;
pub const RSTRING_FSTR: ruby_rstring_flags = 536870912;
@@ -247,9 +276,9 @@ pub const RARRAY_EMBED_LEN_MASK: ruby_rarray_flags = 4161536;
pub type ruby_rarray_flags = u32;
pub const RARRAY_EMBED_LEN_SHIFT: ruby_rarray_consts = 15;
pub type ruby_rarray_consts = u32;
-pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 32768;
+pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 8192;
pub type ruby_rmodule_flags = u32;
-pub const ROBJECT_EMBED: ruby_robject_flags = 8192;
+pub const ROBJECT_HEAP: ruby_robject_flags = 65536;
pub type ruby_robject_flags = u32;
pub type rb_block_call_func = ::std::option::Option<
unsafe extern "C" fn(
@@ -299,20 +328,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15;
pub const BOP_SUCC: ruby_basic_operators = 16;
pub const BOP_GT: ruby_basic_operators = 17;
pub const BOP_GE: ruby_basic_operators = 18;
-pub const BOP_NOT: ruby_basic_operators = 19;
-pub const BOP_NEQ: ruby_basic_operators = 20;
-pub const BOP_MATCH: ruby_basic_operators = 21;
-pub const BOP_FREEZE: ruby_basic_operators = 22;
-pub const BOP_UMINUS: ruby_basic_operators = 23;
-pub const BOP_MAX: ruby_basic_operators = 24;
-pub const BOP_MIN: ruby_basic_operators = 25;
-pub const BOP_HASH: ruby_basic_operators = 26;
-pub const BOP_CALL: ruby_basic_operators = 27;
-pub const BOP_AND: ruby_basic_operators = 28;
-pub const BOP_OR: ruby_basic_operators = 29;
-pub const BOP_CMP: ruby_basic_operators = 30;
-pub const BOP_DEFAULT: ruby_basic_operators = 31;
-pub const BOP_LAST_: ruby_basic_operators = 32;
+pub const BOP_GTGT: ruby_basic_operators = 19;
+pub const BOP_NOT: ruby_basic_operators = 20;
+pub const BOP_NEQ: ruby_basic_operators = 21;
+pub const BOP_MATCH: ruby_basic_operators = 22;
+pub const BOP_FREEZE: ruby_basic_operators = 23;
+pub const BOP_UMINUS: ruby_basic_operators = 24;
+pub const BOP_MAX: ruby_basic_operators = 25;
+pub const BOP_MIN: ruby_basic_operators = 26;
+pub const BOP_HASH: ruby_basic_operators = 27;
+pub const BOP_CALL: ruby_basic_operators = 28;
+pub const BOP_AND: ruby_basic_operators = 29;
+pub const BOP_OR: ruby_basic_operators = 30;
+pub const BOP_CMP: ruby_basic_operators = 31;
+pub const BOP_DEFAULT: ruby_basic_operators = 32;
+pub const BOP_PACK: ruby_basic_operators = 33;
+pub const BOP_INCLUDE_P: ruby_basic_operators = 34;
+pub const BOP_LAST_: ruby_basic_operators = 35;
pub type ruby_basic_operators = u32;
pub type rb_serial_t = ::std::os::raw::c_ulonglong;
pub const imemo_env: imemo_type = 0;
@@ -324,11 +356,10 @@ pub const imemo_memo: imemo_type = 5;
pub const imemo_ment: imemo_type = 6;
pub const imemo_iseq: imemo_type = 7;
pub const imemo_tmpbuf: imemo_type = 8;
-pub const imemo_ast: imemo_type = 9;
-pub const imemo_parser_strterm: imemo_type = 10;
-pub const imemo_callinfo: imemo_type = 11;
-pub const imemo_callcache: imemo_type = 12;
-pub const imemo_constcache: imemo_type = 13;
+pub const imemo_callinfo: imemo_type = 10;
+pub const imemo_callcache: imemo_type = 11;
+pub const imemo_constcache: imemo_type = 12;
+pub const imemo_fields: imemo_type = 13;
pub type imemo_type = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
@@ -381,10 +412,11 @@ pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9;
pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10;
pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11;
pub type rb_method_type_t = u32;
+pub type rb_cfunc_t = ::std::option::Option<unsafe extern "C" fn() -> VALUE>;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rb_method_cfunc_struct {
- pub func: ::std::option::Option<unsafe extern "C" fn() -> VALUE>,
+ pub func: rb_cfunc_t,
pub invoker: ::std::option::Option<
unsafe extern "C" fn(
recv: VALUE,
@@ -402,11 +434,6 @@ pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3;
pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4;
pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5;
pub type method_optimized_type = u32;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct rb_id_table {
- _unused: [u8; 0],
-}
pub type rb_num_t = ::std::os::raw::c_ulong;
pub const RUBY_TAG_NONE: ruby_tag_type = 0;
pub const RUBY_TAG_RETURN: ruby_tag_type = 1;
@@ -426,8 +453,6 @@ pub type ruby_vm_throw_flags = u32;
pub struct iseq_inline_constant_cache_entry {
pub flags: VALUE,
pub value: VALUE,
- pub _unused1: VALUE,
- pub _unused2: VALUE,
pub ic_cref: *const rb_cref_t,
}
#[repr(C)]
@@ -439,7 +464,7 @@ pub struct iseq_inline_constant_cache {
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct iseq_inline_iv_cache_entry {
- pub value: usize,
+ pub value: u64,
pub iv_set_name: ID,
}
#[repr(C)]
@@ -447,13 +472,24 @@ pub struct iseq_inline_iv_cache_entry {
pub struct iseq_inline_cvar_cache_entry {
pub entry: *mut rb_cvar_class_tbl_entry,
}
+pub const ISEQ_TYPE_TOP: rb_iseq_type = 0;
+pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1;
+pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2;
+pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3;
+pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4;
+pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5;
+pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6;
+pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7;
+pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8;
+pub type rb_iseq_type = u32;
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
-pub const BUILTIN_ATTR_NO_GC: rb_builtin_attr = 2;
-pub const BUILTIN_ATTR_SINGLE_NOARG_INLINE: rb_builtin_attr = 4;
+pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
+pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
+pub const BUILTIN_ATTR_C_TRACE: rb_builtin_attr = 8;
pub type rb_builtin_attr = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
-pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword {
+pub struct rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword {
pub num: ::std::os::raw::c_int,
pub required_num: ::std::os::raw::c_int,
pub bits_start: ::std::os::raw::c_int,
@@ -559,6 +595,13 @@ pub const VM_CHECKMATCH_TYPE_WHEN: vm_check_match_type = 1;
pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2;
pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3;
pub type vm_check_match_type = u32;
+pub const VM_OPT_NEWARRAY_SEND_MAX: vm_opt_newarray_send_type = 1;
+pub const VM_OPT_NEWARRAY_SEND_MIN: vm_opt_newarray_send_type = 2;
+pub const VM_OPT_NEWARRAY_SEND_HASH: vm_opt_newarray_send_type = 3;
+pub const VM_OPT_NEWARRAY_SEND_PACK: vm_opt_newarray_send_type = 4;
+pub const VM_OPT_NEWARRAY_SEND_PACK_BUFFER: vm_opt_newarray_send_type = 5;
+pub const VM_OPT_NEWARRAY_SEND_INCLUDE_P: vm_opt_newarray_send_type = 6;
+pub type vm_opt_newarray_send_type = u32;
pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1;
pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2;
pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3;
@@ -583,36 +626,16 @@ pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256;
pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512;
pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024;
pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048;
+pub const VM_FRAME_FLAG_BOX_REQUIRE: vm_frame_env_flags = 4096;
pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2;
pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
pub type vm_frame_env_flags = u32;
-pub type attr_index_t = u32;
+pub type attr_index_t = u16;
pub type shape_id_t = u32;
-pub type redblack_id_t = u32;
-pub type redblack_node_t = redblack_node;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct rb_shape {
- pub edges: *mut rb_id_table,
- pub edge_name: ID,
- pub next_iv_index: attr_index_t,
- pub capacity: u32,
- pub type_: u8,
- pub size_pool_index: u8,
- pub parent_id: shape_id_t,
- pub ancestor_index: *mut redblack_node_t,
-}
-pub type rb_shape_t = rb_shape;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct redblack_node {
- pub key: ID,
- pub value: *mut rb_shape_t,
- pub l: redblack_id_t,
- pub r: redblack_id_t,
-}
+pub const SHAPE_ID_HAS_IVAR_MASK: shape_id_mask = 134742014;
+pub type shape_id_mask = u32;
#[repr(C)]
pub struct rb_cvar_class_tbl_entry {
pub index: u32,
@@ -632,9 +655,17 @@ pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 8;
pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 9;
pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 10;
pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 11;
-pub const VM_CALL__END: vm_call_flag_bits = 12;
+pub const VM_CALL_ARGS_SPLAT_MUT_bit: vm_call_flag_bits = 12;
+pub const VM_CALL_FORWARDING_bit: vm_call_flag_bits = 13;
+pub const VM_CALL__END: vm_call_flag_bits = 14;
pub type vm_call_flag_bits = u32;
#[repr(C)]
+pub struct rb_callinfo_kwarg {
+ pub keyword_len: ::std::os::raw::c_int,
+ pub references: ::std::os::raw::c_int,
+ pub keywords: __IncompleteArrayField<VALUE>,
+}
+#[repr(C)]
pub struct rb_callinfo {
pub flags: VALUE,
pub kwarg: *const rb_callinfo_kwarg,
@@ -648,6 +679,8 @@ pub struct rb_call_data {
pub ci: *const rb_callinfo,
pub cc: *const rb_callcache,
}
+pub const RSTRING_CHILLED: ruby_rstring_private_flags = 49152;
+pub type ruby_rstring_private_flags = u32;
pub const RHASH_PASS_AS_KEYWORDS: ruby_rhash_flags = 8192;
pub const RHASH_PROC_DEFAULT: ruby_rhash_flags = 16384;
pub const RHASH_ST_TABLE_FLAG: ruby_rhash_flags = 32768;
@@ -688,214 +721,288 @@ pub const YARVINSN_putself: ruby_vminsn_type = 18;
pub const YARVINSN_putobject: ruby_vminsn_type = 19;
pub const YARVINSN_putspecialobject: ruby_vminsn_type = 20;
pub const YARVINSN_putstring: ruby_vminsn_type = 21;
-pub const YARVINSN_concatstrings: ruby_vminsn_type = 22;
-pub const YARVINSN_anytostring: ruby_vminsn_type = 23;
-pub const YARVINSN_toregexp: ruby_vminsn_type = 24;
-pub const YARVINSN_intern: ruby_vminsn_type = 25;
-pub const YARVINSN_newarray: ruby_vminsn_type = 26;
-pub const YARVINSN_newarraykwsplat: ruby_vminsn_type = 27;
-pub const YARVINSN_duparray: ruby_vminsn_type = 28;
-pub const YARVINSN_duphash: ruby_vminsn_type = 29;
-pub const YARVINSN_expandarray: ruby_vminsn_type = 30;
-pub const YARVINSN_concatarray: ruby_vminsn_type = 31;
-pub const YARVINSN_splatarray: ruby_vminsn_type = 32;
-pub const YARVINSN_splatkw: ruby_vminsn_type = 33;
-pub const YARVINSN_newhash: ruby_vminsn_type = 34;
-pub const YARVINSN_newrange: ruby_vminsn_type = 35;
-pub const YARVINSN_pop: ruby_vminsn_type = 36;
-pub const YARVINSN_dup: ruby_vminsn_type = 37;
-pub const YARVINSN_dupn: ruby_vminsn_type = 38;
-pub const YARVINSN_swap: ruby_vminsn_type = 39;
-pub const YARVINSN_opt_reverse: ruby_vminsn_type = 40;
-pub const YARVINSN_topn: ruby_vminsn_type = 41;
-pub const YARVINSN_setn: ruby_vminsn_type = 42;
-pub const YARVINSN_adjuststack: ruby_vminsn_type = 43;
-pub const YARVINSN_defined: ruby_vminsn_type = 44;
-pub const YARVINSN_definedivar: ruby_vminsn_type = 45;
-pub const YARVINSN_checkmatch: ruby_vminsn_type = 46;
-pub const YARVINSN_checkkeyword: ruby_vminsn_type = 47;
-pub const YARVINSN_checktype: ruby_vminsn_type = 48;
-pub const YARVINSN_defineclass: ruby_vminsn_type = 49;
-pub const YARVINSN_definemethod: ruby_vminsn_type = 50;
-pub const YARVINSN_definesmethod: ruby_vminsn_type = 51;
-pub const YARVINSN_send: ruby_vminsn_type = 52;
-pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 53;
-pub const YARVINSN_objtostring: ruby_vminsn_type = 54;
-pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 55;
-pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 56;
-pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 57;
-pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 58;
-pub const YARVINSN_invokesuper: ruby_vminsn_type = 59;
-pub const YARVINSN_invokeblock: ruby_vminsn_type = 60;
-pub const YARVINSN_leave: ruby_vminsn_type = 61;
-pub const YARVINSN_throw: ruby_vminsn_type = 62;
-pub const YARVINSN_jump: ruby_vminsn_type = 63;
-pub const YARVINSN_branchif: ruby_vminsn_type = 64;
-pub const YARVINSN_branchunless: ruby_vminsn_type = 65;
-pub const YARVINSN_branchnil: ruby_vminsn_type = 66;
-pub const YARVINSN_once: ruby_vminsn_type = 67;
-pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 68;
-pub const YARVINSN_opt_plus: ruby_vminsn_type = 69;
-pub const YARVINSN_opt_minus: ruby_vminsn_type = 70;
-pub const YARVINSN_opt_mult: ruby_vminsn_type = 71;
-pub const YARVINSN_opt_div: ruby_vminsn_type = 72;
-pub const YARVINSN_opt_mod: ruby_vminsn_type = 73;
-pub const YARVINSN_opt_eq: ruby_vminsn_type = 74;
-pub const YARVINSN_opt_neq: ruby_vminsn_type = 75;
-pub const YARVINSN_opt_lt: ruby_vminsn_type = 76;
-pub const YARVINSN_opt_le: ruby_vminsn_type = 77;
-pub const YARVINSN_opt_gt: ruby_vminsn_type = 78;
-pub const YARVINSN_opt_ge: ruby_vminsn_type = 79;
-pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 80;
-pub const YARVINSN_opt_and: ruby_vminsn_type = 81;
-pub const YARVINSN_opt_or: ruby_vminsn_type = 82;
-pub const YARVINSN_opt_aref: ruby_vminsn_type = 83;
-pub const YARVINSN_opt_aset: ruby_vminsn_type = 84;
-pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 85;
-pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 86;
-pub const YARVINSN_opt_length: ruby_vminsn_type = 87;
-pub const YARVINSN_opt_size: ruby_vminsn_type = 88;
-pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 89;
-pub const YARVINSN_opt_succ: ruby_vminsn_type = 90;
-pub const YARVINSN_opt_not: ruby_vminsn_type = 91;
-pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 92;
-pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 93;
-pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 94;
-pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 95;
-pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 96;
-pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 97;
-pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 98;
-pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 99;
-pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 100;
-pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 101;
-pub const YARVINSN_trace_nop: ruby_vminsn_type = 102;
-pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 103;
-pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 104;
-pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 105;
-pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 106;
-pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 107;
-pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 108;
-pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 109;
-pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 110;
-pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 111;
-pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 112;
-pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 113;
-pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 114;
-pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 115;
-pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 116;
-pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 117;
-pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 118;
-pub const YARVINSN_trace_putnil: ruby_vminsn_type = 119;
-pub const YARVINSN_trace_putself: ruby_vminsn_type = 120;
-pub const YARVINSN_trace_putobject: ruby_vminsn_type = 121;
-pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 122;
-pub const YARVINSN_trace_putstring: ruby_vminsn_type = 123;
-pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 124;
-pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 125;
-pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 126;
-pub const YARVINSN_trace_intern: ruby_vminsn_type = 127;
-pub const YARVINSN_trace_newarray: ruby_vminsn_type = 128;
-pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 129;
-pub const YARVINSN_trace_duparray: ruby_vminsn_type = 130;
-pub const YARVINSN_trace_duphash: ruby_vminsn_type = 131;
-pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 132;
-pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 133;
-pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 134;
-pub const YARVINSN_trace_splatkw: ruby_vminsn_type = 135;
-pub const YARVINSN_trace_newhash: ruby_vminsn_type = 136;
-pub const YARVINSN_trace_newrange: ruby_vminsn_type = 137;
-pub const YARVINSN_trace_pop: ruby_vminsn_type = 138;
-pub const YARVINSN_trace_dup: ruby_vminsn_type = 139;
-pub const YARVINSN_trace_dupn: ruby_vminsn_type = 140;
-pub const YARVINSN_trace_swap: ruby_vminsn_type = 141;
-pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 142;
-pub const YARVINSN_trace_topn: ruby_vminsn_type = 143;
-pub const YARVINSN_trace_setn: ruby_vminsn_type = 144;
-pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 145;
-pub const YARVINSN_trace_defined: ruby_vminsn_type = 146;
-pub const YARVINSN_trace_definedivar: ruby_vminsn_type = 147;
-pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 148;
-pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 149;
-pub const YARVINSN_trace_checktype: ruby_vminsn_type = 150;
-pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 151;
-pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 152;
-pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 153;
-pub const YARVINSN_trace_send: ruby_vminsn_type = 154;
-pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 155;
-pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 156;
-pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 157;
-pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 158;
-pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 159;
-pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 160;
-pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 161;
-pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 162;
-pub const YARVINSN_trace_leave: ruby_vminsn_type = 163;
-pub const YARVINSN_trace_throw: ruby_vminsn_type = 164;
-pub const YARVINSN_trace_jump: ruby_vminsn_type = 165;
-pub const YARVINSN_trace_branchif: ruby_vminsn_type = 166;
-pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 167;
-pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 168;
-pub const YARVINSN_trace_once: ruby_vminsn_type = 169;
-pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 170;
-pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 171;
-pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 172;
-pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 173;
-pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 174;
-pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 175;
-pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 176;
-pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 177;
-pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 178;
-pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 179;
-pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 180;
-pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 181;
-pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 182;
-pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 183;
-pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 184;
-pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 185;
-pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 186;
-pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 187;
-pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 188;
-pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 189;
-pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 190;
-pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 191;
-pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 192;
-pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 193;
-pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 194;
-pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 195;
-pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 196;
-pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 197;
-pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 198;
-pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 199;
-pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 200;
-pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 201;
-pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 202;
-pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 203;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 204;
+pub const YARVINSN_putchilledstring: ruby_vminsn_type = 22;
+pub const YARVINSN_concatstrings: ruby_vminsn_type = 23;
+pub const YARVINSN_anytostring: ruby_vminsn_type = 24;
+pub const YARVINSN_toregexp: ruby_vminsn_type = 25;
+pub const YARVINSN_intern: ruby_vminsn_type = 26;
+pub const YARVINSN_newarray: ruby_vminsn_type = 27;
+pub const YARVINSN_pushtoarraykwsplat: ruby_vminsn_type = 28;
+pub const YARVINSN_duparray: ruby_vminsn_type = 29;
+pub const YARVINSN_duphash: ruby_vminsn_type = 30;
+pub const YARVINSN_expandarray: ruby_vminsn_type = 31;
+pub const YARVINSN_concatarray: ruby_vminsn_type = 32;
+pub const YARVINSN_concattoarray: ruby_vminsn_type = 33;
+pub const YARVINSN_pushtoarray: ruby_vminsn_type = 34;
+pub const YARVINSN_splatarray: ruby_vminsn_type = 35;
+pub const YARVINSN_splatkw: ruby_vminsn_type = 36;
+pub const YARVINSN_newhash: ruby_vminsn_type = 37;
+pub const YARVINSN_newrange: ruby_vminsn_type = 38;
+pub const YARVINSN_pop: ruby_vminsn_type = 39;
+pub const YARVINSN_dup: ruby_vminsn_type = 40;
+pub const YARVINSN_dupn: ruby_vminsn_type = 41;
+pub const YARVINSN_swap: ruby_vminsn_type = 42;
+pub const YARVINSN_opt_reverse: ruby_vminsn_type = 43;
+pub const YARVINSN_topn: ruby_vminsn_type = 44;
+pub const YARVINSN_setn: ruby_vminsn_type = 45;
+pub const YARVINSN_adjuststack: ruby_vminsn_type = 46;
+pub const YARVINSN_defined: ruby_vminsn_type = 47;
+pub const YARVINSN_definedivar: ruby_vminsn_type = 48;
+pub const YARVINSN_checkmatch: ruby_vminsn_type = 49;
+pub const YARVINSN_checkkeyword: ruby_vminsn_type = 50;
+pub const YARVINSN_checktype: ruby_vminsn_type = 51;
+pub const YARVINSN_defineclass: ruby_vminsn_type = 52;
+pub const YARVINSN_definemethod: ruby_vminsn_type = 53;
+pub const YARVINSN_definesmethod: ruby_vminsn_type = 54;
+pub const YARVINSN_send: ruby_vminsn_type = 55;
+pub const YARVINSN_sendforward: ruby_vminsn_type = 56;
+pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 57;
+pub const YARVINSN_opt_new: ruby_vminsn_type = 58;
+pub const YARVINSN_objtostring: ruby_vminsn_type = 59;
+pub const YARVINSN_opt_ary_freeze: ruby_vminsn_type = 60;
+pub const YARVINSN_opt_hash_freeze: ruby_vminsn_type = 61;
+pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 62;
+pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 63;
+pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 64;
+pub const YARVINSN_opt_duparray_send: ruby_vminsn_type = 65;
+pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 66;
+pub const YARVINSN_invokesuper: ruby_vminsn_type = 67;
+pub const YARVINSN_invokesuperforward: ruby_vminsn_type = 68;
+pub const YARVINSN_invokeblock: ruby_vminsn_type = 69;
+pub const YARVINSN_leave: ruby_vminsn_type = 70;
+pub const YARVINSN_throw: ruby_vminsn_type = 71;
+pub const YARVINSN_jump: ruby_vminsn_type = 72;
+pub const YARVINSN_branchif: ruby_vminsn_type = 73;
+pub const YARVINSN_branchunless: ruby_vminsn_type = 74;
+pub const YARVINSN_branchnil: ruby_vminsn_type = 75;
+pub const YARVINSN_once: ruby_vminsn_type = 76;
+pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 77;
+pub const YARVINSN_opt_plus: ruby_vminsn_type = 78;
+pub const YARVINSN_opt_minus: ruby_vminsn_type = 79;
+pub const YARVINSN_opt_mult: ruby_vminsn_type = 80;
+pub const YARVINSN_opt_div: ruby_vminsn_type = 81;
+pub const YARVINSN_opt_mod: ruby_vminsn_type = 82;
+pub const YARVINSN_opt_eq: ruby_vminsn_type = 83;
+pub const YARVINSN_opt_neq: ruby_vminsn_type = 84;
+pub const YARVINSN_opt_lt: ruby_vminsn_type = 85;
+pub const YARVINSN_opt_le: ruby_vminsn_type = 86;
+pub const YARVINSN_opt_gt: ruby_vminsn_type = 87;
+pub const YARVINSN_opt_ge: ruby_vminsn_type = 88;
+pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 89;
+pub const YARVINSN_opt_and: ruby_vminsn_type = 90;
+pub const YARVINSN_opt_or: ruby_vminsn_type = 91;
+pub const YARVINSN_opt_aref: ruby_vminsn_type = 92;
+pub const YARVINSN_opt_aset: ruby_vminsn_type = 93;
+pub const YARVINSN_opt_length: ruby_vminsn_type = 94;
+pub const YARVINSN_opt_size: ruby_vminsn_type = 95;
+pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 96;
+pub const YARVINSN_opt_succ: ruby_vminsn_type = 97;
+pub const YARVINSN_opt_not: ruby_vminsn_type = 98;
+pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 99;
+pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 100;
+pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 101;
+pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 102;
+pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 103;
+pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 104;
+pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 105;
+pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 106;
+pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 107;
+pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 108;
+pub const YARVINSN_trace_nop: ruby_vminsn_type = 109;
+pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 110;
+pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 111;
+pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 112;
+pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 113;
+pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 114;
+pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 115;
+pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 116;
+pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 117;
+pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 118;
+pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 119;
+pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 120;
+pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 121;
+pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 122;
+pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 123;
+pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 124;
+pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 125;
+pub const YARVINSN_trace_putnil: ruby_vminsn_type = 126;
+pub const YARVINSN_trace_putself: ruby_vminsn_type = 127;
+pub const YARVINSN_trace_putobject: ruby_vminsn_type = 128;
+pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 129;
+pub const YARVINSN_trace_putstring: ruby_vminsn_type = 130;
+pub const YARVINSN_trace_putchilledstring: ruby_vminsn_type = 131;
+pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 132;
+pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 133;
+pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 134;
+pub const YARVINSN_trace_intern: ruby_vminsn_type = 135;
+pub const YARVINSN_trace_newarray: ruby_vminsn_type = 136;
+pub const YARVINSN_trace_pushtoarraykwsplat: ruby_vminsn_type = 137;
+pub const YARVINSN_trace_duparray: ruby_vminsn_type = 138;
+pub const YARVINSN_trace_duphash: ruby_vminsn_type = 139;
+pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 140;
+pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 141;
+pub const YARVINSN_trace_concattoarray: ruby_vminsn_type = 142;
+pub const YARVINSN_trace_pushtoarray: ruby_vminsn_type = 143;
+pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 144;
+pub const YARVINSN_trace_splatkw: ruby_vminsn_type = 145;
+pub const YARVINSN_trace_newhash: ruby_vminsn_type = 146;
+pub const YARVINSN_trace_newrange: ruby_vminsn_type = 147;
+pub const YARVINSN_trace_pop: ruby_vminsn_type = 148;
+pub const YARVINSN_trace_dup: ruby_vminsn_type = 149;
+pub const YARVINSN_trace_dupn: ruby_vminsn_type = 150;
+pub const YARVINSN_trace_swap: ruby_vminsn_type = 151;
+pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 152;
+pub const YARVINSN_trace_topn: ruby_vminsn_type = 153;
+pub const YARVINSN_trace_setn: ruby_vminsn_type = 154;
+pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 155;
+pub const YARVINSN_trace_defined: ruby_vminsn_type = 156;
+pub const YARVINSN_trace_definedivar: ruby_vminsn_type = 157;
+pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 158;
+pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 159;
+pub const YARVINSN_trace_checktype: ruby_vminsn_type = 160;
+pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 161;
+pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 162;
+pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 163;
+pub const YARVINSN_trace_send: ruby_vminsn_type = 164;
+pub const YARVINSN_trace_sendforward: ruby_vminsn_type = 165;
+pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 166;
+pub const YARVINSN_trace_opt_new: ruby_vminsn_type = 167;
+pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 168;
+pub const YARVINSN_trace_opt_ary_freeze: ruby_vminsn_type = 169;
+pub const YARVINSN_trace_opt_hash_freeze: ruby_vminsn_type = 170;
+pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 171;
+pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 172;
+pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 173;
+pub const YARVINSN_trace_opt_duparray_send: ruby_vminsn_type = 174;
+pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 175;
+pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 176;
+pub const YARVINSN_trace_invokesuperforward: ruby_vminsn_type = 177;
+pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 178;
+pub const YARVINSN_trace_leave: ruby_vminsn_type = 179;
+pub const YARVINSN_trace_throw: ruby_vminsn_type = 180;
+pub const YARVINSN_trace_jump: ruby_vminsn_type = 181;
+pub const YARVINSN_trace_branchif: ruby_vminsn_type = 182;
+pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 183;
+pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 184;
+pub const YARVINSN_trace_once: ruby_vminsn_type = 185;
+pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 186;
+pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 187;
+pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 188;
+pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 189;
+pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 190;
+pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 191;
+pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 192;
+pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 193;
+pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 194;
+pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 195;
+pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 196;
+pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 197;
+pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 198;
+pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 199;
+pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 200;
+pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 201;
+pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 202;
+pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 203;
+pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 204;
+pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 205;
+pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 206;
+pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 207;
+pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 208;
+pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 209;
+pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 210;
+pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 211;
+pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 212;
+pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 213;
+pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214;
+pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215;
+pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216;
+pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217;
+pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218;
+pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219;
+pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220;
+pub const YARVINSN_zjit_send: ruby_vminsn_type = 221;
+pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222;
+pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223;
+pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224;
+pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225;
+pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226;
+pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227;
+pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228;
+pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229;
+pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230;
+pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231;
+pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232;
+pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236;
+pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237;
+pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240;
+pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241;
+pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242;
+pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243;
+pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245;
+pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247;
+pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249;
pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
>;
-pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: robject_offsets = 16;
-pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: robject_offsets = 24;
-pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16;
-pub type robject_offsets = u32;
-pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
-pub type rstring_offsets = u32;
-pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
+pub const DEFINED_NOT_DEFINED: defined_type = 0;
+pub const DEFINED_NIL: defined_type = 1;
+pub const DEFINED_IVAR: defined_type = 2;
+pub const DEFINED_LVAR: defined_type = 3;
+pub const DEFINED_GVAR: defined_type = 4;
+pub const DEFINED_CVAR: defined_type = 5;
+pub const DEFINED_CONST: defined_type = 6;
+pub const DEFINED_METHOD: defined_type = 7;
+pub const DEFINED_YIELD: defined_type = 8;
+pub const DEFINED_ZSUPER: defined_type = 9;
+pub const DEFINED_SELF: defined_type = 10;
+pub const DEFINED_TRUE: defined_type = 11;
+pub const DEFINED_FALSE: defined_type = 12;
+pub const DEFINED_ASGN: defined_type = 13;
+pub const DEFINED_EXPR: defined_type = 14;
+pub const DEFINED_REF: defined_type = 15;
+pub const DEFINED_FUNC: defined_type = 16;
+pub const DEFINED_CONST_FROM: defined_type = 17;
+pub type defined_type = u32;
+pub type rb_seq_param_keyword_struct =
+ rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword;
+pub const ROBJECT_OFFSET_AS_HEAP_FIELDS: jit_bindgen_constants = 16;
+pub const ROBJECT_OFFSET_AS_ARY: jit_bindgen_constants = 16;
+pub const RUBY_OFFSET_RSTRING_LEN: jit_bindgen_constants = 16;
+pub const RUBY_OFFSET_EC_CFP: jit_bindgen_constants = 16;
+pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: jit_bindgen_constants = 32;
+pub const RUBY_OFFSET_EC_INTERRUPT_MASK: jit_bindgen_constants = 36;
+pub const RUBY_OFFSET_EC_THREAD_PTR: jit_bindgen_constants = 48;
+pub const RUBY_OFFSET_EC_RACTOR_ID: jit_bindgen_constants = 64;
+pub type jit_bindgen_constants = u32;
+pub type rb_iseq_param_keyword_struct =
+ rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword;
extern "C" {
+ pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
pub fn rb_class_attached_object(klass: VALUE) -> VALUE;
pub fn rb_singleton_class(obj: VALUE) -> VALUE;
pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
pub fn rb_method_basic_definition_p(klass: VALUE, mid: ID) -> ::std::os::raw::c_int;
pub fn rb_bug(fmt: *const ::std::os::raw::c_char, ...) -> !;
+ pub fn rb_float_new(d: f64) -> VALUE;
pub fn rb_gc_mark(obj: VALUE);
pub fn rb_gc_mark_movable(obj: VALUE);
pub fn rb_gc_location(obj: VALUE) -> VALUE;
pub fn rb_gc_writebarrier(old: VALUE, young: VALUE);
pub fn rb_class_get_superclass(klass: VALUE) -> VALUE;
+ pub fn rb_funcall(recv: VALUE, mid: ID, n: ::std::os::raw::c_int, ...) -> VALUE;
pub static mut rb_mKernel: VALUE;
pub static mut rb_cBasicObject: VALUE;
pub static mut rb_cArray: VALUE;
+ pub static mut rb_cClass: VALUE;
pub static mut rb_cFalseClass: VALUE;
pub static mut rb_cFloat: VALUE;
pub static mut rb_cHash: VALUE;
@@ -903,6 +1010,7 @@ extern "C" {
pub static mut rb_cInteger: VALUE;
pub static mut rb_cModule: VALUE;
pub static mut rb_cNilClass: VALUE;
+ pub static mut rb_cNumeric: VALUE;
pub static mut rb_cString: VALUE;
pub static mut rb_cSymbol: VALUE;
pub static mut rb_cThread: VALUE;
@@ -912,6 +1020,7 @@ extern "C" {
pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE);
pub fn rb_ary_dup(ary: VALUE) -> VALUE;
pub fn rb_ary_resurrect(ary: VALUE) -> VALUE;
+ pub fn rb_ary_cat(ary: VALUE, train: *const VALUE, len: ::std::os::raw::c_long) -> VALUE;
pub fn rb_ary_push(ary: VALUE, elem: VALUE) -> VALUE;
pub fn rb_ary_clear(ary: VALUE) -> VALUE;
pub fn rb_hash_new() -> VALUE;
@@ -925,7 +1034,13 @@ extern "C" {
pub fn rb_intern2(name: *const ::std::os::raw::c_char, len: ::std::os::raw::c_long) -> ID;
pub fn rb_id2name(id: ID) -> *const ::std::os::raw::c_char;
pub fn rb_class2name(klass: VALUE) -> *const ::std::os::raw::c_char;
+ pub fn rb_class_new_instance_pass_kw(
+ argc: ::std::os::raw::c_int,
+ argv: *const VALUE,
+ klass: VALUE,
+ ) -> VALUE;
pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE;
+ pub fn rb_obj_alloc(klass: VALUE) -> VALUE;
pub fn rb_obj_frozen_p(obj: VALUE) -> VALUE;
pub fn rb_backref_get() -> VALUE;
pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE;
@@ -941,10 +1056,14 @@ extern "C" {
pub fn rb_str_buf_append(dst: VALUE, src: VALUE) -> VALUE;
pub fn rb_str_dup(str_: VALUE) -> VALUE;
pub fn rb_str_intern(str_: VALUE) -> VALUE;
+ pub fn rb_mod_name(mod_: VALUE) -> VALUE;
pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE;
pub fn rb_ivar_defined(obj: VALUE, name: ID) -> VALUE;
pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE;
+ pub fn rb_const_get(space: VALUE, name: ID) -> VALUE;
pub fn rb_obj_info_dump(obj: VALUE);
+ pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
+ pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE;
pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
pub fn rb_ary_tmp_new_from_values(
arg1: VALUE,
@@ -956,13 +1075,15 @@ extern "C" {
n: ::std::os::raw::c_long,
elts: *const VALUE,
) -> VALUE;
- pub static mut rb_vm_insns_count: u64;
+ pub fn rb_vm_top_self() -> VALUE;
+ pub static mut rb_vm_insn_count: u64;
pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
pub fn rb_callable_method_entry_or_negative(
klass: VALUE,
id: ID,
) -> *const rb_callable_method_entry_t;
+ pub static mut rb_cRubyVM: VALUE;
pub static mut rb_mRubyVMFrozenCore: VALUE;
pub static mut rb_block_param_proxy: VALUE;
pub fn rb_vm_ep_local_ep(ep: *const VALUE) -> *const VALUE;
@@ -973,22 +1094,36 @@ extern "C" {
cfp: *const rb_control_frame_t,
) -> *const rb_callable_method_entry_t;
pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char;
- pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
pub fn rb_ec_stack_check(ec: *mut rb_execution_context_struct) -> ::std::os::raw::c_int;
pub fn rb_shape_id_offset() -> i32;
- pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t;
- pub fn rb_shape_get_shape_id(obj: VALUE) -> shape_id_t;
- pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool;
- pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool;
- pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t;
- pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t;
+ pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t;
+ pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool;
+ pub fn rb_shape_transition_add_ivar_no_warnings(
+ klass: VALUE,
+ original_shape_id: shape_id_t,
+ id: ID,
+ ) -> shape_id_t;
+ pub fn rb_ivar_get_at(obj: VALUE, index: attr_index_t, id: ID) -> VALUE;
+ pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE;
pub fn rb_gvar_get(arg1: ID) -> VALUE;
pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
- pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32);
+ pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32);
pub fn rb_vm_barrier();
+ pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE;
+ pub fn rb_str_substr_two_fixnums(
+ str_: VALUE,
+ beg: VALUE,
+ len: VALUE,
+ empty: ::std::os::raw::c_int,
+ ) -> VALUE;
pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE;
- pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE;
+ pub fn rb_ec_str_resurrect(
+ ec: *mut rb_execution_context_struct,
+ str_: VALUE,
+ chilled: bool,
+ ) -> VALUE;
+ pub fn rb_to_hash_type(obj: VALUE) -> VALUE;
pub fn rb_hash_stlike_foreach(
hash: VALUE,
func: st_foreach_callback_func,
@@ -1003,6 +1138,10 @@ extern "C" {
) -> ::std::os::raw::c_int;
pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
+ pub fn rb_float_plus(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_minus(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_mul(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_div(x: VALUE, y: VALUE) -> VALUE;
pub fn rb_fix_aref(fix: VALUE, idx: VALUE) -> VALUE;
pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint;
@@ -1015,32 +1154,58 @@ extern "C" {
lines: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_int;
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
- pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
- pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
- pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
- pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long;
- pub fn rb_yjit_icache_invalidate(
- start: *mut ::std::os::raw::c_void,
- end: *mut ::std::os::raw::c_void,
- );
pub fn rb_yjit_exit_locations_dict(
yjit_raw_samples: *mut VALUE,
yjit_line_samples: *mut ::std::os::raw::c_int,
samples_len: ::std::os::raw::c_int,
) -> VALUE;
- pub fn rb_yjit_get_page_size() -> u32;
- pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8;
pub fn rb_c_method_tracing_currently_enabled(ec: *const rb_execution_context_t) -> bool;
pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE);
- pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
- pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t);
+ pub fn rb_get_symbol_id(namep: VALUE) -> ID;
+ pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
+ pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE;
+ pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
+ pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE;
+ pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE;
+ pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize;
+ pub fn rb_yjit_splat_varg_checks(
+ sp: *mut VALUE,
+ splat_array: VALUE,
+ cfp: *mut rb_control_frame_t,
+ ) -> VALUE;
+ pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int;
+ pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
+ pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char;
+ pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE);
+ pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int;
+ pub fn rb_yjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool;
+ pub fn rb_yjit_obj_written(
+ old: VALUE,
+ young: VALUE,
+ file: *const ::std::os::raw::c_char,
+ line: ::std::os::raw::c_int,
+ );
+ pub fn rb_object_shape_count() -> VALUE;
+ pub fn rb_yjit_shape_obj_too_complex_p(obj: VALUE) -> bool;
+ pub fn rb_yjit_shape_capacity(shape_id: shape_id_t) -> attr_index_t;
+ pub fn rb_yjit_shape_index(shape_id: shape_id_t) -> attr_index_t;
+ pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize;
+ pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize;
+ pub fn rb_yjit_cme_ractor_serial(cme: *const rb_callable_method_entry_t) -> rb_serial_t;
+ pub fn rb_yjit_set_exception_return(
+ cfp: *mut rb_control_frame_t,
+ leave_exit: *mut ::std::os::raw::c_void,
+ leave_exception: *mut ::std::os::raw::c_void,
+ );
+ pub fn rb_vm_instruction_size() -> u32;
+ pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
- pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char;
pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID;
@@ -1054,7 +1219,6 @@ extern "C" {
pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t;
pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t;
pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
- pub fn rb_get_symbol_id(namep: VALUE) -> ID;
pub fn rb_get_cme_def_body_optimized_type(
cme: *const rb_callable_method_entry_t,
) -> method_optimized_type;
@@ -1066,98 +1230,93 @@ extern "C" {
) -> *mut rb_method_cfunc_t;
pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize;
pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID;
+ pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE;
+ pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
+ pub fn rb_optimized_call(
+ recv: *mut VALUE,
+ ec: *mut rb_execution_context_t,
+ argc: ::std::os::raw::c_int,
+ argv: *mut VALUE,
+ kw_splat: ::std::os::raw::c_int,
+ block_handler: VALUE,
+ ) -> VALUE;
+ pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int;
pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void;
pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t;
- pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE;
pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t;
pub fn rb_get_iseq_body_parent_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t;
pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE;
pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
+ pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type;
pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_anon_kwrest(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_ruby2_keywords(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_ambiguous_param0(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_forwardable(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_body_param_keyword(
iseq: *const rb_iseq_t,
- ) -> *const rb_seq_param_keyword_struct;
+ ) -> *const rb_iseq_param_keyword_struct;
pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int;
pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int;
pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE;
- pub fn rb_optimized_call(
- recv: *mut VALUE,
- ec: *mut rb_execution_context_t,
- argc: ::std::os::raw::c_int,
- argv: *mut VALUE,
- kw_splat: ::std::os::raw::c_int,
- block_handler: VALUE,
- ) -> VALUE;
- pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
- pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
- pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE;
pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct;
pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t;
pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
- pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
- pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE;
pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE;
- pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_yarv_class_of(obj: VALUE) -> VALUE;
- pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
- pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE;
- pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE;
- pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE;
- pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE;
- pub fn rb_yjit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE;
- pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE;
- pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long;
- pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE);
pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo;
pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool;
pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE;
- pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int;
- pub fn rb_yjit_multi_ractor_p() -> bool;
pub fn rb_assert_iseq_handle(handle: VALUE);
+ pub fn rb_assert_holding_vm_lock();
pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int;
pub fn rb_assert_cme_handle(handle: VALUE);
- pub fn rb_yjit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
- pub fn rb_yjit_obj_written(
- old: VALUE,
- young: VALUE,
- file: *const ::std::os::raw::c_char,
- line: ::std::os::raw::c_int,
- );
- pub fn rb_yjit_vm_lock_then_barrier(
+ pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_jit_array_len(a: VALUE) -> ::std::os::raw::c_long;
+ pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
+ pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
+ pub fn rb_jit_shape_too_complex_p(shape_id: shape_id_t) -> bool;
+ pub fn rb_jit_multi_ractor_p() -> bool;
+ pub fn rb_jit_vm_lock_then_barrier(
recursive_lock_level: *mut ::std::os::raw::c_uint,
file: *const ::std::os::raw::c_char,
line: ::std::os::raw::c_int,
);
- pub fn rb_yjit_vm_unlock(
+ pub fn rb_jit_vm_unlock(
recursive_lock_level: *mut ::std::os::raw::c_uint,
file: *const ::std::os::raw::c_char,
line: ::std::os::raw::c_int,
);
- pub fn rb_yjit_assert_holding_vm_lock();
- pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize;
- pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize;
- pub fn rb_yjit_set_exception_return(
- cfp: *mut rb_control_frame_t,
- leave_exit: *mut ::std::os::raw::c_void,
- leave_exception: *mut ::std::os::raw::c_void,
+ pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t);
+ pub fn rb_jit_get_page_size() -> u32;
+ pub fn rb_jit_reserve_addr_space(mem_size: u32) -> *mut u8;
+ pub fn rb_jit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
+ pub fn rb_jit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
+ pub fn rb_jit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+ pub fn rb_jit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
+ pub fn rb_jit_icache_invalidate(
+ start: *mut ::std::os::raw::c_void,
+ end: *mut ::std::os::raw::c_void,
);
+ pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_jit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
+ pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE);
}
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
index 7875276815..4f85937ee9 100644
--- a/yjit/src/disasm.rs
+++ b/yjit/src/disasm.rs
@@ -1,16 +1,44 @@
use crate::core::*;
use crate::cruby::*;
use crate::yjit::yjit_enabled_p;
-#[cfg(feature = "disasm")]
use crate::asm::CodeBlock;
-#[cfg(feature = "disasm")]
use crate::codegen::CodePtr;
-#[cfg(feature = "disasm")]
use crate::options::DumpDisasm;
-#[cfg(feature = "disasm")]
use std::fmt::Write;
+#[cfg_attr(not(feature = "disasm"), allow(dead_code))]
+#[derive(Copy, Clone, Debug)]
+pub struct TerminalColor {
+ pub blue_begin: &'static str,
+ pub blue_end: &'static str,
+ pub bold_begin: &'static str,
+ pub bold_end: &'static str,
+}
+
+pub static TTY_TERMINAL_COLOR: TerminalColor = TerminalColor {
+ blue_begin: "\x1b[34m",
+ blue_end: "\x1b[0m",
+ bold_begin: "\x1b[1m",
+ bold_end: "\x1b[22m",
+};
+
+pub static NON_TTY_TERMINAL_COLOR: TerminalColor = TerminalColor {
+ blue_begin: "",
+ blue_end: "",
+ bold_begin: "",
+ bold_end: "",
+};
+
+/// Terminal escape codes for colors, font weight, etc. Only enabled if stdout is a TTY.
+pub fn get_colors() -> &'static TerminalColor {
+ if crate::utils::stdout_supports_colors() {
+ &TTY_TERMINAL_COLOR
+ } else {
+ &NON_TTY_TERMINAL_COLOR
+ }
+}
+
/// Primitive called in yjit.rb
/// Produce a string representing the disassembly for an ISEQ
#[no_mangle]
@@ -23,11 +51,6 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
#[cfg(feature = "disasm")]
{
- // TODO:
- //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
- // return Qnil;
- //}
-
if !yjit_enabled_p() {
return Qnil;
}
@@ -115,19 +138,21 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> St
return out;
}
-#[cfg(feature = "disasm")]
+/// Dump dissassembly for a range in a [CodeBlock]. VM lock required.
pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: CodePtr, dump_disasm: &DumpDisasm) {
- use std::fs::File;
- use std::io::Write;
-
for (start_addr, end_addr) in cb.writable_addrs(start_addr, end_addr) {
let disasm = disasm_addr_range(cb, start_addr, end_addr);
if disasm.len() > 0 {
match dump_disasm {
DumpDisasm::Stdout => println!("{disasm}"),
- DumpDisasm::File(path) => {
- let mut f = File::options().create(true).append(true).open(path).unwrap();
- f.write_all(disasm.as_bytes()).unwrap();
+ DumpDisasm::File(fd) => {
+ use std::os::unix::io::{FromRawFd, IntoRawFd};
+ use std::io::Write;
+
+ // Write with the fd opened during boot
+ let mut file = unsafe { std::fs::File::from_raw_fd(*fd) };
+ file.write_all(disasm.as_bytes()).unwrap();
+ let _ = file.into_raw_fd(); // keep the fd open
}
};
}
@@ -165,6 +190,7 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) ->
#[cfg(test)]
let start_addr = 0;
let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
+ let colors = get_colors();
// For each instruction in this block
for insn in insns.as_ref() {
@@ -172,23 +198,63 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) ->
if let Some(comment_list) = cb.comments_at(insn.address() as usize) {
for comment in comment_list {
if cb.outlined {
- write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue
+ write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue
}
- writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold
+ writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold
}
}
if cb.outlined {
- write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue
+ write!(&mut out, "{}", colors.blue_begin).unwrap(); // Make outlined code blue
}
writeln!(&mut out, " {insn}").unwrap();
if cb.outlined {
- write!(&mut out, "\x1b[0m").unwrap(); // Disable blue
+ write!(&mut out, "{}", colors.blue_end).unwrap(); // Disable blue
}
}
return out;
}
+/// Fallback version without dependency on a disassembler which prints just bytes and comments.
+#[cfg(not(feature = "disasm"))]
+pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String {
+ let mut out = String::new();
+ let mut line_byte_idx = 0;
+ const MAX_BYTES_PER_LINE: usize = 16;
+ let colors = get_colors();
+
+ for addr in start_addr..end_addr {
+ if let Some(comment_list) = cb.comments_at(addr) {
+ // Start a new line if we're in the middle of one
+ if line_byte_idx != 0 {
+ writeln!(&mut out).unwrap();
+ line_byte_idx = 0;
+ }
+ for comment in comment_list {
+ writeln!(&mut out, " {}# {comment}{}", colors.bold_begin, colors.bold_end).unwrap(); // Make comments bold
+ }
+ }
+ if line_byte_idx == 0 {
+ write!(&mut out, " 0x{addr:x}: ").unwrap();
+ } else {
+ write!(&mut out, " ").unwrap();
+ }
+ let byte = unsafe { (addr as *const u8).read() };
+ write!(&mut out, "{byte:02x}").unwrap();
+ line_byte_idx += 1;
+ if line_byte_idx == MAX_BYTES_PER_LINE - 1 {
+ writeln!(&mut out).unwrap();
+ line_byte_idx = 0;
+ }
+ }
+
+ if !out.is_empty() {
+ writeln!(&mut out).unwrap();
+ }
+
+ out
+}
+
/// Assert that CodeBlock has the code specified with hex. In addition, if tested with
/// `cargo test --all-features`, it also checks it generates the specified disasm.
#[cfg(test)]
@@ -261,43 +327,36 @@ pub fn unindent(string: &str, trim_lines: bool) -> String {
/// Produce a list of instructions compiled for an isew
#[no_mangle]
pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE {
- {
- // TODO:
- //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
- // return Qnil;
- //}
-
- if !yjit_enabled_p() {
- return Qnil;
- }
-
- // Get the iseq pointer from the wrapper
- let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
+ if !yjit_enabled_p() {
+ return Qnil;
+ }
- // Get the list of instructions compiled
- let insn_vec = insns_compiled(iseq);
+ // Get the iseq pointer from the wrapper
+ let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
- unsafe {
- let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64);
+ // Get the list of instructions compiled
+ let insn_vec = insns_compiled(iseq);
- // For each instruction compiled
- for idx in 0..insn_vec.len() {
- let op_name = &insn_vec[idx].0;
- let insn_idx = insn_vec[idx].1;
+ unsafe {
+ let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64);
- let op_sym = rust_str_to_sym(&op_name);
+ // For each instruction compiled
+ for idx in 0..insn_vec.len() {
+ let op_name = &insn_vec[idx].0;
+ let insn_idx = insn_vec[idx].1;
- // Store the instruction index and opcode symbol
- rb_ary_store(
- insn_ary,
- (2 * idx + 0) as i64,
- VALUE::fixnum_from_usize(insn_idx as usize),
- );
- rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym);
- }
+ let op_sym = rust_str_to_sym(&op_name);
- insn_ary
+ // Store the instruction index and opcode symbol
+ rb_ary_store(
+ insn_ary,
+ (2 * idx + 0) as i64,
+ VALUE::fixnum_from_usize(insn_idx as usize),
+ );
+ rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym);
}
+
+ insn_ary
}
}
diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs
index 59f7b70e20..0f22fba6b8 100644
--- a/yjit/src/invariants.rs
+++ b/yjit/src/invariants.rs
@@ -1,7 +1,6 @@
//! Code to track assumptions made during code generation and invalidate
//! generated code if and when these assumptions are invalidated.
-use crate::asm::OutlinedCb;
use crate::backend::ir::Assembler;
use crate::codegen::*;
use crate::core::*;
@@ -31,7 +30,6 @@ pub struct Invariants {
/// quick access to all of the blocks that are making this assumption when
/// the operator is redefined.
basic_operator_blocks: HashMap<(RedefinitionFlag, ruby_basic_operators), HashSet<BlockRef>>,
-
/// A map from a block to a set of classes and their associated basic
/// operators that the block is assuming are not redefined. This is used for
/// quick access to all of the assumptions that a block is making when it
@@ -49,10 +47,23 @@ pub struct Invariants {
/// a constant `A::B` is redefined, then all blocks that are assuming that
/// `A` and `B` have not be redefined must be invalidated.
constant_state_blocks: HashMap<ID, HashSet<BlockRef>>,
-
/// A map from a block to a set of IDs that it is assuming have not been
/// redefined.
block_constant_states: HashMap<BlockRef, HashSet<ID>>,
+
+ /// A map from a class to a set of blocks that assume objects of the class
+ /// will have no singleton class. When the set is empty, it means that
+ /// there has been a singleton class for the class after boot, so you cannot
+ /// assume no singleton class going forward.
+ /// For now, the key can be only Array, Hash, or String. Consider making
+ /// an inverted HashMap if we start using this for user-defined classes
+ /// to maintain the performance of block_assumptions_free().
+ no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>,
+
+ /// A map from an ISEQ to a set of blocks that assume base pointer is equal
+ /// to environment pointer. When the set is empty, it means that EP has been
+ /// escaped in the ISEQ.
+ no_ep_escape_iseqs: HashMap<IseqPtr, HashSet<BlockRef>>,
}
/// Private singleton instance of the invariants global struct.
@@ -69,6 +80,8 @@ impl Invariants {
single_ractor: HashSet::new(),
constant_state_blocks: HashMap::new(),
block_constant_states: HashMap::new(),
+ no_singleton_classes: HashMap::new(),
+ no_ep_escape_iseqs: HashMap::new(),
});
}
}
@@ -85,12 +98,11 @@ impl Invariants {
pub fn assume_bop_not_redefined(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
klass: RedefinitionFlag,
bop: ruby_basic_operators,
) -> bool {
if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } {
- if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() {
+ if jit_ensure_block_entry_exit(jit, asm).is_none() {
return false;
}
jit.bop_assumptions.push((klass, bop));
@@ -130,6 +142,48 @@ pub fn track_method_lookup_stability_assumption(
.insert(uninit_block);
}
+/// Track that a block will assume that `klass` objects will have no singleton class.
+pub fn track_no_singleton_class_assumption(uninit_block: BlockRef, klass: VALUE) {
+ Invariants::get_instance()
+ .no_singleton_classes
+ .entry(klass)
+ .or_default()
+ .insert(uninit_block);
+}
+
+/// Returns true if we've seen a singleton class of a given class since boot.
+pub fn has_singleton_class_of(klass: VALUE) -> bool {
+ Invariants::get_instance()
+ .no_singleton_classes
+ .get(&klass)
+ .map_or(false, |blocks| blocks.is_empty())
+}
+
+/// Track that a block will assume that base pointer is equal to environment pointer.
+pub fn track_no_ep_escape_assumption(uninit_block: BlockRef, iseq: IseqPtr) {
+ Invariants::get_instance()
+ .no_ep_escape_iseqs
+ .entry(iseq)
+ .or_default()
+ .insert(uninit_block);
+}
+
+/// Returns true if a given ISEQ has previously escaped an environment.
+pub fn iseq_escapes_ep(iseq: IseqPtr) -> bool {
+ Invariants::get_instance()
+ .no_ep_escape_iseqs
+ .get(&iseq)
+ .map_or(false, |blocks| blocks.is_empty())
+}
+
+/// Forget an ISEQ remembered in invariants
+pub fn iseq_free_invariants(iseq: IseqPtr) {
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+ Invariants::get_instance().no_ep_escape_iseqs.remove(&iseq);
+}
+
// Checks rb_method_basic_definition_p and registers the current block for invalidation if method
// lookup changes.
// A "basic method" is one defined during VM boot, so we can use this to check assumptions based on
@@ -137,13 +191,12 @@ pub fn track_method_lookup_stability_assumption(
pub fn assume_method_basic_definition(
jit: &mut JITState,
asm: &mut Assembler,
- ocb: &mut OutlinedCb,
klass: VALUE,
mid: ID
) -> bool {
if unsafe { rb_method_basic_definition_p(klass, mid) } != 0 {
let cme = unsafe { rb_callable_method_entry(klass, mid) };
- jit.assume_method_lookup_stable(asm, ocb, cme);
+ jit.assume_method_lookup_stable(asm, cme);
true
} else {
false
@@ -152,11 +205,11 @@ pub fn assume_method_basic_definition(
/// Tracks that a block is assuming it is operating in single-ractor mode.
#[must_use]
-pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool {
- if unsafe { rb_yjit_multi_ractor_p() } {
+pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler) -> bool {
+ if unsafe { rb_jit_multi_ractor_p() } {
false
} else {
- if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() {
+ if jit_ensure_block_entry_exit(jit, asm).is_none() {
return false;
}
jit.block_assumes_single_ractor = true;
@@ -250,7 +303,7 @@ pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_e
});
}
-/// Callback for then Ruby is about to spawn a ractor. In that case we need to
+/// Callback for when Ruby is about to spawn a ractor. In that case we need to
/// invalidate every block that is assuming single ractor mode.
#[no_mangle]
pub extern "C" fn rb_yjit_before_ractor_spawn() {
@@ -321,6 +374,23 @@ pub extern "C" fn rb_yjit_root_mark() {
}
}
+#[no_mangle]
+pub extern "C" fn rb_yjit_root_update_references() {
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+ let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs;
+
+ // Make a copy of the table with updated ISEQ keys
+ let mut updated_copy = HashMap::with_capacity(no_ep_escape_iseqs.len());
+ for (iseq, blocks) in mem::take(no_ep_escape_iseqs) {
+ let new_iseq = unsafe { rb_gc_location(iseq.into()) }.as_iseq();
+ updated_copy.insert(new_iseq, blocks);
+ }
+
+ *no_ep_escape_iseqs = updated_copy;
+}
+
/// Remove all invariant assumptions made by the block by removing the block as
/// as a key in all of the relevant tables.
/// For safety, the block has to be initialized and the vm lock must be held.
@@ -391,6 +461,19 @@ pub fn block_assumptions_free(blockref: BlockRef) {
if invariants.constant_state_blocks.is_empty() {
invariants.constant_state_blocks.shrink_to_fit();
}
+
+ // Remove tracking for blocks assuming no singleton class
+ // NOTE: no_singleton_class has up to 3 keys (Array, Hash, or String) for now.
+ // This is effectively an O(1) access unless we start using it for more classes.
+ for (_, blocks) in invariants.no_singleton_classes.iter_mut() {
+ blocks.remove(&blockref);
+ }
+
+ // Remove tracking for blocks assuming EP doesn't escape
+ let iseq = unsafe { blockref.as_ref() }.get_blockid().iseq;
+ if let Some(blocks) = invariants.no_ep_escape_iseqs.get_mut(&iseq) {
+ blocks.remove(&blockref);
+ }
}
/// Callback from the opt_setinlinecache instruction in the interpreter.
@@ -412,7 +495,7 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, ins
return;
};
- if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } {
+ if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_jit_multi_ractor_p() } {
// We can't generate code in these situations, so no need to invalidate.
// See gen_opt_getinlinecache.
return;
@@ -457,6 +540,66 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, ins
});
}
+/// Invalidate blocks that assume objects of a given class will have no singleton class.
+#[no_mangle]
+pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) {
+ // Skip tracking singleton classes during boot. Such objects already have a singleton class
+ // before entering JIT code, so they get rejected when they're checked for the first time.
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+
+ // We apply this optimization only to Array, Hash, and String for now.
+ if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&klass) } {
+ with_vm_lock(src_loc!(), || {
+ let no_singleton_classes = &mut Invariants::get_instance().no_singleton_classes;
+ match no_singleton_classes.get_mut(&klass) {
+ Some(blocks) => {
+ // Invalidate existing blocks and let has_singleton_class_of()
+ // return true when they are compiled again
+ for block in mem::take(blocks) {
+ invalidate_block_version(&block);
+ incr_counter!(invalidate_no_singleton_class);
+ }
+ }
+ None => {
+ // Let has_singleton_class_of() return true for this class
+ no_singleton_classes.insert(klass, HashSet::new());
+ }
+ }
+ });
+ }
+}
+
+/// Invalidate blocks for a given ISEQ that assumes environment pointer is
+/// equal to base pointer.
+#[no_mangle]
+pub extern "C" fn rb_yjit_invalidate_ep_is_bp(iseq: IseqPtr) {
+ // Skip tracking EP escapes on boot. We don't need to invalidate anything during boot.
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+
+ with_vm_lock(src_loc!(), || {
+ // If an EP escape for this ISEQ is detected for the first time, invalidate all blocks
+ // associated to the ISEQ.
+ let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs;
+ match no_ep_escape_iseqs.get_mut(&iseq) {
+ Some(blocks) => {
+ // Invalidate existing blocks and make jit.ep_is_bp() return false
+ for block in mem::take(blocks) {
+ invalidate_block_version(&block);
+ incr_counter!(invalidate_ep_escape);
+ }
+ }
+ None => {
+ // Let jit.ep_is_bp() return false for this ISEQ
+ no_ep_escape_iseqs.insert(iseq, HashSet::new());
+ }
+ }
+ });
+}
+
// Invalidate all generated code and patch C method return code to contain
// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
@@ -483,6 +626,8 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
return;
}
+ incr_counter!(invalidate_everything);
+
// Stop other ractors since we are going to patch machine code.
with_vm_lock(src_loc!(), || {
// Make it so all live block versions are no longer valid branch targets
@@ -545,7 +690,7 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
cb.set_write_ptr(patch.inline_patch_pos);
cb.set_dropped_bytes(false);
cb.without_page_end_reserve(|cb| {
- let mut asm = crate::backend::ir::Assembler::new();
+ let mut asm = crate::backend::ir::Assembler::new_without_iseq();
asm.jmp(patch.outlined_target_pos.as_side_exit());
if asm.compile(cb, None).is_none() {
panic!("Failed to apply patch at {:?}", patch.inline_patch_pos);
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
index 3f3d24be4b..f3247fbf1a 100644
--- a/yjit/src/lib.rs
+++ b/yjit/src/lib.rs
@@ -3,6 +3,19 @@
#![allow(clippy::too_many_arguments)] // :shrug:
#![allow(clippy::identity_op)] // Sometimes we do it for style
+// TODO(alan): This lint is right -- the way we use `static mut` is UB happy. We have many globals
+// and take `&mut` frequently, sometimes with a method that easily allows calling it twice.
+//
+// All of our globals rely on us running single threaded, which outside of boot-time relies on the
+// VM lock (which signals and waits for all other threads to pause). To fix this properly, we should
+// gather up all the globals into a struct to centralize the safety reasoning. That way we can also
+// check for re-entrance in one place.
+//
+// We're too close to release to do that, though, so disable the lint for now.
+#![allow(unknown_lints)]
+#![allow(static_mut_refs)]
+#![warn(unknown_lints)]
+
pub mod asm;
mod backend;
mod codegen;
@@ -15,3 +28,4 @@ mod stats;
mod utils;
mod yjit;
mod virtualmem;
+mod log;
diff --git a/yjit/src/log.rs b/yjit/src/log.rs
new file mode 100644
index 0000000000..c5a724f7e1
--- /dev/null
+++ b/yjit/src/log.rs
@@ -0,0 +1,179 @@
+use crate::core::BlockId;
+use crate::cruby::*;
+use crate::options::*;
+use crate::yjit::yjit_enabled_p;
+
+use std::fmt::{Display, Formatter};
+use std::os::raw::c_long;
+use crate::utils::iseq_get_location;
+
+type Timestamp = f64;
+
+#[derive(Clone, Debug)]
+pub struct LogEntry {
+ /// The time when the block was compiled.
+ pub timestamp: Timestamp,
+
+ /// The log message.
+ pub message: String,
+}
+
+impl Display for LogEntry {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:15.6}: {}", self.timestamp, self.message)
+ }
+}
+
+pub type Log = CircularBuffer<LogEntry, 1024>;
+static mut LOG: Option<Log> = None;
+
+impl Log {
+ pub fn init() {
+ unsafe {
+ LOG = Some(Log::new());
+ }
+ }
+
+ pub fn get_instance() -> &'static mut Log {
+ unsafe {
+ LOG.as_mut().unwrap()
+ }
+ }
+
+ pub fn has_instance() -> bool {
+ unsafe {
+ LOG.as_mut().is_some()
+ }
+ }
+
+ pub fn add_block_with_chain_depth(block_id: BlockId, chain_depth: u8) {
+ if !Self::has_instance() {
+ return;
+ }
+
+ let print_log = get_option!(log);
+ let timestamp = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
+
+ let location = iseq_get_location(block_id.iseq, block_id.idx);
+ let index = block_id.idx;
+ let message = if chain_depth > 0 {
+ format!("{} (index: {}, chain_depth: {})", location, index, chain_depth)
+ } else {
+ format!("{} (index: {})", location, index)
+ };
+
+ let entry = LogEntry {
+ timestamp,
+ message
+ };
+
+ if let Some(output) = print_log {
+ match output {
+ LogOutput::Stderr => {
+ eprintln!("{}", entry);
+ }
+
+ LogOutput::File(fd) => {
+ use std::os::unix::io::{FromRawFd, IntoRawFd};
+ use std::io::Write;
+
+ // Write with the fd opened during boot
+ let mut file = unsafe { std::fs::File::from_raw_fd(fd) };
+ writeln!(file, "{}", entry).unwrap();
+ file.flush().unwrap();
+ let _ = file.into_raw_fd(); // keep the fd open
+ }
+
+ LogOutput::MemoryOnly => () // Don't print or write anything
+ }
+ }
+
+ Self::get_instance().push(entry);
+ }
+}
+
+pub struct CircularBuffer<T, const N: usize> {
+ buffer: Vec<Option<T>>,
+ head: usize,
+ tail: usize,
+ size: usize
+}
+
+impl<T: Clone, const N: usize> CircularBuffer<T, N> {
+ pub fn new() -> Self {
+ Self {
+ buffer: vec![None; N],
+ head: 0,
+ tail: 0,
+ size: 0
+ }
+ }
+
+ pub fn push(&mut self, value: T) {
+ self.buffer[self.head] = Some(value);
+ self.head = (self.head + 1) % N;
+ if self.size == N {
+ self.tail = (self.tail + 1) % N;
+ } else {
+ self.size += 1;
+ }
+ }
+
+ pub fn pop(&mut self) -> Option<T> {
+ if self.size == 0 {
+ return None;
+ }
+
+ let value = self.buffer[self.tail].take();
+ self.tail = (self.tail + 1) % N;
+ self.size -= 1;
+ value
+ }
+
+ pub fn len(&self) -> usize {
+ self.size
+ }
+}
+
+
+//===========================================================================
+
+/// Primitive called in yjit.rb
+/// Check if log generation is enabled
+#[no_mangle]
+pub extern "C" fn rb_yjit_log_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ if get_option!(log).is_some() {
+ return Qtrue;
+ } else {
+ return Qfalse;
+ }
+}
+
+/// Primitive called in yjit.rb.
+/// Export all YJIT log entries as a Ruby array.
+#[no_mangle]
+pub extern "C" fn rb_yjit_get_log(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ with_vm_lock(src_loc!(), || rb_yjit_get_log_array())
+}
+
+fn rb_yjit_get_log_array() -> VALUE {
+ if !yjit_enabled_p() || get_option!(log).is_none() {
+ return Qnil;
+ }
+
+ let log = Log::get_instance();
+ let array = unsafe { rb_ary_new_capa(log.len() as c_long) };
+
+ while log.len() > 0 {
+ let entry = log.pop().unwrap();
+
+ unsafe {
+ let entry_array = rb_ary_new_capa(2);
+ rb_ary_push(entry_array, rb_float_new(entry.timestamp));
+ rb_ary_push(entry_array, entry.message.into());
+ rb_ary_push(array, entry_array);
+ }
+ }
+
+ return array;
+}
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 72db513030..c87a436091 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -1,5 +1,5 @@
use std::{ffi::{CStr, CString}, ptr::null, fs::File};
-use crate::backend::current::TEMP_REGS;
+use crate::{backend::current::TEMP_REGS, cruby::*, stats::Counter};
use std::os::raw::{c_char, c_int, c_uint};
// Call threshold for small deployments and command-line apps
@@ -24,12 +24,17 @@ pub static mut rb_yjit_call_threshold: u64 = SMALL_CALL_THRESHOLD;
pub static mut rb_yjit_cold_threshold: u64 = 200_000;
// Command-line options
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Debug)]
#[repr(C)]
pub struct Options {
- // Size of the executable memory block to allocate in bytes
- // Note that the command line argument is expressed in MiB and not bytes
- pub exec_mem_size: usize,
+ /// Soft limit of all memory used by YJIT in bytes
+ /// VirtualMem avoids allocating new pages if code_region_size + yjit_alloc_size
+ /// is larger than this threshold. Rust may still allocate memory beyond this limit.
+ pub mem_size: usize,
+
+ /// Hard limit of the executable memory block to allocate in bytes
+ /// Note that the command line argument is expressed in MiB and not bytes
+ pub exec_mem_size: Option<usize>,
// Disable the propagation of type information
pub no_type_prop: bool,
@@ -41,6 +46,9 @@ pub struct Options {
// The number of registers allocated for stack temps
pub num_temp_regs: usize,
+ // Disable Ruby builtin methods defined by `with_jit` hooks, e.g. Array#each in Ruby
+ pub c_builtin: bool,
+
// Capture stats
pub gen_stats: bool,
@@ -48,7 +56,7 @@ pub struct Options {
pub print_stats: bool,
// Trace locations of exits
- pub gen_trace_exits: bool,
+ pub trace_exits: Option<TraceExits>,
// how often to sample exit trace data
pub trace_exits_sample_rate: usize,
@@ -76,17 +84,22 @@ pub struct Options {
pub code_gc: bool,
/// Enable writing /tmp/perf-{pid}.map for Linux perf
- pub perf_map: bool,
+ pub perf_map: Option<PerfMap>,
+
+ // Where to store the log. `None` disables the log.
+ pub log: Option<LogOutput>,
}
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
- exec_mem_size: 64 * 1024 * 1024,
+ mem_size: 128 * 1024 * 1024,
+ exec_mem_size: None,
no_type_prop: false,
max_versions: 4,
num_temp_regs: 5,
+ c_builtin: false,
gen_stats: false,
- gen_trace_exits: false,
+ trace_exits: None,
print_stats: true,
trace_exits_sample_rate: 0,
disable: false,
@@ -96,28 +109,59 @@ pub static mut OPTIONS: Options = Options {
dump_iseq_disasm: None,
frame_pointer: false,
code_gc: false,
- perf_map: false,
+ perf_map: None,
+ log: None,
};
/// YJIT option descriptions for `ruby --help`.
-static YJIT_OPTIONS: [(&str, &str); 9] = [
- ("--yjit-exec-mem-size=num", "Size of executable memory block in MiB (default: 64)"),
- ("--yjit-call-threshold=num", "Number of calls to trigger JIT"),
- ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)"),
- ("--yjit-stats", "Enable collecting YJIT statistics"),
- ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable"),
- ("--yjit-code-gc", "Run code GC when the code size reaches the limit"),
- ("--yjit-perf", "Enable frame pointers and perf profiling"),
- ("--yjit-trace-exits", "Record Ruby source location when exiting from generated code"),
- ("--yjit-trace-exits-sample-rate=num", "Trace exit locations only every Nth occurrence"),
+/// Note that --help allows only 80 characters per line, including indentation. 80-character limit --> |
+pub const YJIT_OPTIONS: &'static [(&str, &str)] = &[
+ ("--yjit-mem-size=num", "Soft limit on YJIT memory usage in MiB (default: 128)."),
+ ("--yjit-exec-mem-size=num", "Hard limit on executable memory block in MiB."),
+ ("--yjit-call-threshold=num", "Number of calls to trigger JIT."),
+ ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."),
+ ("--yjit-stats", "Enable collecting YJIT statistics."),
+ ("--yjit-log[=file|dir]", "Enable logging of YJIT's compilation activity."),
+ ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."),
+ ("--yjit-code-gc", "Run code GC when the code size reaches the limit."),
+ ("--yjit-perf", "Enable frame pointers and perf profiling."),
+ ("--yjit-trace-exits", "Record Ruby source location when exiting from generated code."),
+ ("--yjit-trace-exits-sample-rate=num", "Trace exit locations only every Nth occurrence."),
];
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum TraceExits {
+ // Trace all exits
+ All,
+ // Trace a specific counter
+ Counter(Counter),
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum LogOutput {
+ // Dump to the log file as events occur.
+ File(std::os::unix::io::RawFd),
+ // Keep the log in memory only
+ MemoryOnly,
+ // Dump to stderr when the process exits
+ Stderr
+}
+
+#[derive(Debug)]
pub enum DumpDisasm {
// Dump to stdout
Stdout,
// Dump to "yjit_{pid}.log" file under the specified directory
- File(String),
+ File(std::os::unix::io::RawFd),
+}
+
+/// Type of symbols to dump into /tmp/perf-{pid}.map
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum PerfMap {
+ // Dump ISEQ symbols
+ ISEQ,
+ // Dump YJIT codegen symbols
+ Codegen,
}
/// Macro to get an option value by name
@@ -128,7 +172,7 @@ macro_rules! get_option {
{
// Make this a statement since attributes on expressions are experimental
#[allow(unused_unsafe)]
- let ret = unsafe { OPTIONS.$option_name };
+ let ret = unsafe { crate::options::OPTIONS.$option_name };
ret
}
};
@@ -144,6 +188,7 @@ macro_rules! get_option_ref {
};
}
pub(crate) use get_option_ref;
+use crate::log::Log;
/// Expected to receive what comes after the third dash in "--yjit-*".
/// Empty string means user passed only "--yjit". C code rejects when
@@ -165,6 +210,20 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
match (opt_name, opt_val) {
("", "") => (), // Simply --yjit
+ ("mem-size", _) => match opt_val.parse::<usize>() {
+ Ok(n) => {
+ if n == 0 || n > 2 * 1024 * 1024 {
+ return None
+ }
+
+ // Convert from MiB to bytes internally for convenience
+ unsafe { OPTIONS.mem_size = n * 1024 * 1024 }
+ }
+ Err(_) => {
+ return None;
+ }
+ },
+
("exec-mem-size", _) => match opt_val.parse::<usize>() {
Ok(n) => {
if n == 0 || n > 2 * 1024 * 1024 {
@@ -172,7 +231,7 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
// Convert from MiB to bytes internally for convenience
- unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
+ unsafe { OPTIONS.exec_mem_size = Some(n * 1024 * 1024) }
}
Err(_) => {
return None;
@@ -214,6 +273,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
+ ("c-builtin", _) => unsafe {
+ OPTIONS.c_builtin = true;
+ },
+
("code-gc", _) => unsafe {
OPTIONS.code_gc = true;
},
@@ -221,28 +284,41 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
("perf", _) => match opt_val {
"" => unsafe {
OPTIONS.frame_pointer = true;
- OPTIONS.perf_map = true;
+ OPTIONS.perf_map = Some(PerfMap::ISEQ);
},
"fp" => unsafe { OPTIONS.frame_pointer = true },
- "map" => unsafe { OPTIONS.perf_map = true },
+ "iseq" => unsafe { OPTIONS.perf_map = Some(PerfMap::ISEQ) },
+ // Accept --yjit-perf=map for backward compatibility
+ "codegen" | "map" => unsafe { OPTIONS.perf_map = Some(PerfMap::Codegen) },
_ => return None,
},
- ("dump-disasm", _) => match opt_val {
- "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
- directory => {
- let path = format!("{directory}/yjit_{}.log", std::process::id());
- match File::options().create(true).append(true).open(&path) {
- Ok(_) => {
- eprintln!("YJIT disasm dump: {path}");
- unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(path)) }
+ ("dump-disasm", _) => {
+ if !cfg!(feature = "disasm") {
+ eprintln!("WARNING: the {} option works best when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name);
+ }
+
+ match opt_val {
+ "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
+ directory => {
+ let path = format!("{directory}/yjit_{}.log", std::process::id());
+ match File::options().create(true).append(true).open(&path) {
+ Ok(file) => {
+ use std::os::unix::io::IntoRawFd;
+ eprintln!("YJIT disasm dump: {path}");
+ unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(file.into_raw_fd())) }
+ }
+ Err(err) => eprintln!("Failed to create {path}: {err}"),
}
- Err(err) => eprintln!("Failed to create {path}: {err}"),
}
}
- },
+ },
("dump-iseq-disasm", _) => unsafe {
+ if !cfg!(feature = "disasm") {
+ eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name);
+ }
+
OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
},
@@ -257,8 +333,51 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
return None;
}
},
- ("trace-exits", "") => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true; OPTIONS.trace_exits_sample_rate = 0 },
- ("trace-exits-sample-rate", sample_rate) => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true; OPTIONS.trace_exits_sample_rate = sample_rate.parse().unwrap(); },
+ ("log", _) => match opt_val {
+ "" => unsafe {
+ OPTIONS.log = Some(LogOutput::Stderr);
+ Log::init();
+ },
+ "quiet" => unsafe {
+ OPTIONS.log = Some(LogOutput::MemoryOnly);
+ Log::init();
+ },
+ arg_value => {
+ let log_file_path = if std::path::Path::new(arg_value).is_dir() {
+ format!("{arg_value}/yjit_{}.log", std::process::id())
+ } else {
+ arg_value.to_string()
+ };
+
+ match File::options().create(true).write(true).truncate(true).open(&log_file_path) {
+ Ok(file) => {
+ use std::os::unix::io::IntoRawFd;
+ eprintln!("YJIT log: {log_file_path}");
+
+ unsafe { OPTIONS.log = Some(LogOutput::File(file.into_raw_fd())) }
+ Log::init()
+ }
+ Err(err) => panic!("Failed to create {log_file_path}: {err}"),
+ }
+ }
+ },
+ ("trace-exits", _) => unsafe {
+ OPTIONS.gen_stats = true;
+ OPTIONS.trace_exits = match opt_val {
+ "" => Some(TraceExits::All),
+ name => match Counter::get(name) {
+ Some(counter) => Some(TraceExits::Counter(counter)),
+ None => return None,
+ },
+ };
+ },
+ ("trace-exits-sample-rate", sample_rate) => unsafe {
+ OPTIONS.gen_stats = true;
+ if OPTIONS.trace_exits.is_none() {
+ OPTIONS.trace_exits = Some(TraceExits::All);
+ }
+ OPTIONS.trace_exits_sample_rate = sample_rate.parse().unwrap();
+ },
("dump-insns", "") => unsafe { OPTIONS.dump_insns = true },
("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true },
@@ -301,3 +420,13 @@ pub extern "C" fn rb_yjit_show_usage(help: c_int, highlight: c_int, width: c_uin
unsafe { ruby_show_usage_line(name.as_ptr(), null(), description.as_ptr(), help, highlight, width, columns) }
}
}
+
+/// Return true if --yjit-c-builtin is given
+#[no_mangle]
+pub extern "C" fn rb_yjit_c_builtin_p(_ec: EcPtr, _self: VALUE) -> VALUE {
+ if get_option!(c_builtin) {
+ Qtrue
+ } else {
+ Qfalse
+ }
+}
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 924628e13a..105def2fff 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -1,111 +1,98 @@
//! Everything related to the collection of runtime stats in YJIT
-//! See the stats feature and the --yjit-stats command-line option
+//! See the --yjit-stats command-line option
-#![allow(dead_code)] // Counters are only used with the stats features
-
-use std::alloc::{GlobalAlloc, Layout, System};
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::ptr::addr_of_mut;
+use std::sync::atomic::Ordering;
use std::time::Instant;
use std::collections::HashMap;
use crate::codegen::CodegenGlobals;
-use crate::core::Context;
-use crate::core::for_each_iseq_payload;
use crate::cruby::*;
use crate::options::*;
-use crate::yjit::yjit_enabled_p;
+use crate::yjit::{yjit_enabled_p, YJIT_INIT_TIME};
+
+#[cfg(feature = "stats_allocator")]
+#[path = "../../jit/src/lib.rs"]
+mod jit;
-/// A running total of how many ISeqs are in the system.
+/// Running total of how many ISeqs are in the system.
#[no_mangle]
pub static mut rb_yjit_live_iseq_count: u64 = 0;
-/// A middleware to count Rust-allocated bytes as yjit_alloc_size.
-#[global_allocator]
-static GLOBAL_ALLOCATOR: StatsAlloc = StatsAlloc { alloc_size: AtomicUsize::new(0) };
+/// Monotonically increasing total of how many ISEQs were allocated
+#[no_mangle]
+pub static mut rb_yjit_iseq_alloc_count: u64 = 0;
-pub struct StatsAlloc {
- alloc_size: AtomicUsize,
+/// The number of bytes YJIT has allocated on the Rust heap.
+pub fn yjit_alloc_size() -> usize {
+ jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
}
-unsafe impl GlobalAlloc for StatsAlloc {
- unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
- self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst);
- System.alloc(layout)
- }
-
- unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
- self.alloc_size.fetch_sub(layout.size(), Ordering::SeqCst);
- System.dealloc(ptr, layout)
- }
-
- unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
- self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst);
- System.alloc_zeroed(layout)
- }
-
- unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
- if new_size > layout.size() {
- self.alloc_size.fetch_add(new_size - layout.size(), Ordering::SeqCst);
- } else if new_size < layout.size() {
- self.alloc_size.fetch_sub(layout.size() - new_size, Ordering::SeqCst);
- }
- System.realloc(ptr, layout, new_size)
- }
-}
-
-/// Mapping of C function name to integer indices
+/// Mapping of C function / ISEQ name to integer indices
/// This is accessed at compilation time only (protected by a lock)
static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None;
+static mut ISEQ_NAME_TO_IDX: Option<HashMap<String, usize>> = None;
-/// Vector of call counts for each C function index
+/// Vector of call counts for each C function / ISEQ index
/// This is modified (but not resized) by JITted code
static mut CFUNC_CALL_COUNT: Option<Vec<u64>> = None;
+static mut ISEQ_CALL_COUNT: Option<Vec<u64>> = None;
/// Assign an index to a given cfunc name string
-pub fn get_cfunc_idx(name: &str) -> usize
-{
- //println!("{}", name);
-
- unsafe {
- if CFUNC_NAME_TO_IDX.is_none() {
- CFUNC_NAME_TO_IDX = Some(HashMap::default());
- }
+pub fn get_cfunc_idx(name: &str) -> usize {
+ // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables.
+ unsafe { get_method_idx(name, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)) }
+}
- if CFUNC_CALL_COUNT.is_none() {
- CFUNC_CALL_COUNT = Some(Vec::default());
- }
+/// Assign an index to a given ISEQ name string
+pub fn get_iseq_idx(name: &str) -> usize {
+ // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables.
+ unsafe { get_method_idx(name, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)) }
+}
- let name_to_idx = CFUNC_NAME_TO_IDX.as_mut().unwrap();
+fn get_method_idx(
+ name: &str,
+ method_name_to_idx: &mut Option<HashMap<String, usize>>,
+ method_call_count: &mut Option<Vec<u64>>,
+) -> usize {
+ //println!("{}", name);
- match name_to_idx.get(name) {
- Some(idx) => *idx,
- None => {
- let idx = name_to_idx.len();
- name_to_idx.insert(name.to_string(), idx);
+ let name_to_idx = method_name_to_idx.get_or_insert_with(HashMap::default);
+ let call_count = method_call_count.get_or_insert_with(Vec::default);
- // Resize the call count vector
- let cfunc_call_count = CFUNC_CALL_COUNT.as_mut().unwrap();
- if idx >= cfunc_call_count.len() {
- cfunc_call_count.resize(idx + 1, 0);
- }
+ match name_to_idx.get(name) {
+ Some(idx) => *idx,
+ None => {
+ let idx = name_to_idx.len();
+ name_to_idx.insert(name.to_string(), idx);
- idx
+ // Resize the call count vector
+ if idx >= call_count.len() {
+ call_count.resize(idx + 1, 0);
}
+
+ idx
}
}
}
// Increment the counter for a C function
-pub extern "C" fn incr_cfunc_counter(idx: usize)
-{
- unsafe {
- let cfunc_call_count = CFUNC_CALL_COUNT.as_mut().unwrap();
- assert!(idx < cfunc_call_count.len());
- cfunc_call_count[idx] += 1;
- }
+pub extern "C" fn incr_cfunc_counter(idx: usize) {
+ let cfunc_call_count = unsafe { CFUNC_CALL_COUNT.as_mut().unwrap() };
+ assert!(idx < cfunc_call_count.len());
+ cfunc_call_count[idx] += 1;
+}
+
+// Increment the counter for an ISEQ
+pub extern "C" fn incr_iseq_counter(idx: usize) {
+ let iseq_call_count = unsafe { ISEQ_CALL_COUNT.as_mut().unwrap() };
+ assert!(idx < iseq_call_count.len());
+ iseq_call_count[idx] += 1;
}
-// YJIT exit counts for each instruction type
+/// YJIT exit counts for each instruction type.
+/// Note that `VM_INSTRUCTION_SIZE` is an upper bound and the actual number
+/// of VM opcodes may be different in the build. See [`rb_vm_instruction_size()`]
const VM_INSTRUCTION_SIZE_USIZE: usize = VM_INSTRUCTION_SIZE as usize;
static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE_USIZE] = [0; VM_INSTRUCTION_SIZE_USIZE];
@@ -128,7 +115,7 @@ impl YjitExitLocations {
/// Initialize the yjit exit locations
pub fn init() {
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return;
}
@@ -177,7 +164,7 @@ impl YjitExitLocations {
}
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return;
}
@@ -219,6 +206,14 @@ macro_rules! make_counters {
pub enum Counter { $($counter_name),+ }
impl Counter {
+ /// Map a counter name string to a counter enum
+ pub fn get(name: &str) -> Option<Counter> {
+ match name {
+ $( stringify!($counter_name) => { Some(Counter::$counter_name) } ),+
+ _ => None,
+ }
+ }
+
/// Get a counter name string
pub fn get_name(&self) -> String {
match self {
@@ -245,15 +240,31 @@ macro_rules! make_counters {
/// The list of counters that are available without --yjit-stats.
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
-pub const DEFAULT_COUNTERS: [Counter; 8] = [
+pub const DEFAULT_COUNTERS: &'static [Counter] = &[
Counter::code_gc_count,
Counter::compiled_iseq_entry,
Counter::cold_iseq_entry,
Counter::compiled_iseq_count,
Counter::compiled_blockid_count,
Counter::compiled_block_count,
+ Counter::deleted_defer_block_count,
Counter::compiled_branch_count,
Counter::compile_time_ns,
+ Counter::compilation_failure,
+ Counter::max_inline_versions,
+ Counter::inline_block_count,
+ Counter::num_contexts_encoded,
+ Counter::context_cache_hits,
+
+ Counter::invalidation_count,
+ Counter::invalidate_method_lookup,
+ Counter::invalidate_bop_redefined,
+ Counter::invalidate_ractor_spawn,
+ Counter::invalidate_constant_state_bump,
+ Counter::invalidate_constant_ic_fill,
+ Counter::invalidate_no_singleton_class,
+ Counter::invalidate_ep_escape,
+ Counter::invalidate_everything,
];
/// Macro to increase a counter by name and count
@@ -269,6 +280,24 @@ macro_rules! incr_counter_by {
}
pub(crate) use incr_counter_by;
+/// Macro to increase a counter if the given value is larger
+macro_rules! incr_counter_to {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($counter_name:ident, $count:expr) => {
+ #[allow(unused_unsafe)]
+ {
+ unsafe {
+ $crate::stats::COUNTERS.$counter_name = u64::max(
+ $crate::stats::COUNTERS.$counter_name,
+ $count as u64,
+ )
+ }
+ }
+ };
+}
+pub(crate) use incr_counter_to;
+
/// Macro to increment a counter by name
macro_rules! incr_counter {
// Unsafe is ok here because options are initialized
@@ -291,23 +320,17 @@ macro_rules! ptr_to_counter {
}
};
}
-pub(crate) use ptr_to_counter;
// Declare all the counters we track
make_counters! {
yjit_insns_count,
// Method calls that fallback to dynamic dispatch
- send_keywords,
- send_kw_splat,
- send_args_splat_super,
- send_iseq_zsuper,
- send_block_arg,
+ send_singleton_class,
+ send_forwarding,
send_ivar_set_method,
send_zsuper_method,
send_undef_method,
- send_optimized_method,
- send_optimized_method_call,
send_optimized_method_block_call,
send_call_block,
send_call_kwarg,
@@ -317,67 +340,63 @@ make_counters! {
send_missing_method,
send_refined_method,
send_private_not_fcall,
- send_cfunc_ruby_array_varg,
+ send_cfunc_kw_splat_non_nil,
+ send_cfunc_splat_neg2,
send_cfunc_argc_mismatch,
send_cfunc_block_arg,
send_cfunc_toomany_args,
send_cfunc_tracing,
- send_cfunc_kwargs,
send_cfunc_splat_with_kw,
- send_cfunc_splat_send,
+ send_cfunc_splat_varg_ruby2_keywords,
send_attrset_kwargs,
send_attrset_block_arg,
send_iseq_tailcall,
send_iseq_arity_error,
send_iseq_block_arg_type,
send_iseq_clobbering_block_arg,
+ send_iseq_block_arg_gc_unsafe,
+ send_iseq_complex_discard_extras,
send_iseq_leaf_builtin_block_arg_block_param,
- send_iseq_only_keywords,
- send_iseq_kwargs_req_and_opt_missing,
+ send_iseq_kw_splat_non_nil,
send_iseq_kwargs_mismatch,
send_iseq_has_post,
- send_iseq_has_kwrest,
send_iseq_has_no_kw,
send_iseq_accepts_no_kwarg,
send_iseq_materialized_block,
- send_iseq_splat_with_opt,
+ send_iseq_send_forwarding,
+ send_iseq_splat_not_array,
send_iseq_splat_with_kw,
send_iseq_missing_optional_kw,
send_iseq_too_many_kwargs,
send_not_implemented_method,
send_getter_arity,
send_getter_block_arg,
- send_args_splat_non_iseq,
- send_args_splat_ivar,
send_args_splat_attrset,
send_args_splat_bmethod,
send_args_splat_aref,
send_args_splat_aset,
send_args_splat_opt_call,
- send_args_splat_cfunc_var_args,
- send_args_splat_cfunc_zuper,
- send_args_splat_cfunc_ruby2_keywords,
send_iseq_splat_arity_error,
send_splat_too_long,
- send_iseq_ruby2_keywords,
- send_send_not_imm,
send_send_wrong_args,
send_send_null_mid,
send_send_null_cme,
send_send_nested,
- send_send_chain_string,
- send_send_chain_not_string_or_sym,
- send_send_getter,
- send_send_builtin,
+ send_send_attr_reader,
+ send_send_attr_writer,
send_iseq_has_rest_and_captured,
- send_iseq_has_rest_and_splat,
+ send_iseq_has_kwrest_and_captured,
send_iseq_has_rest_and_kw_supplied,
send_iseq_has_rest_opt_and_block,
send_bmethod_ractor,
send_bmethod_block_arg,
send_optimized_block_arg,
+ send_pred_not_fixnum,
+ send_pred_underflow,
+ send_str_dup_exivar,
invokesuper_defined_class_mismatch,
+ invokesuper_forwarding,
invokesuper_kw_splat,
invokesuper_kwarg,
invokesuper_megamorphic,
@@ -385,14 +404,15 @@ make_counters! {
invokesuper_no_me,
invokesuper_not_iseq_or_cfunc,
invokesuper_refinement,
+ invokesuper_singleton_class,
invokeblock_megamorphic,
invokeblock_none,
invokeblock_iseq_arg0_optional,
- invokeblock_iseq_arg0_has_kw,
invokeblock_iseq_arg0_args_splat,
invokeblock_iseq_arg0_not_array,
invokeblock_iseq_arg0_wrong_len,
+ invokeblock_iseq_not_inlined,
invokeblock_ifunc_args_splat,
invokeblock_ifunc_kw_splat,
invokeblock_proc,
@@ -400,22 +420,27 @@ make_counters! {
// Method calls that exit to the interpreter
guard_send_block_arg_type,
+ guard_send_getter_splat_non_empty,
guard_send_klass_megamorphic,
guard_send_se_cf_overflow,
guard_send_se_protected_check_failed,
guard_send_splatarray_length_not_equal,
- guard_send_splatarray_last_ruby_2_keywords,
+ guard_send_splatarray_last_ruby2_keywords,
guard_send_splat_not_array,
- guard_send_send_chain,
- guard_send_send_chain_not_string,
- guard_send_send_chain_not_sym,
+ guard_send_send_name_chain,
guard_send_iseq_has_rest_and_splat_too_few,
guard_send_is_a_class_mismatch,
guard_send_instance_of_class_mismatch,
guard_send_interrupted,
guard_send_not_fixnums,
+ guard_send_not_fixnum,
+ guard_send_not_fixnum_or_flonum,
guard_send_not_string,
guard_send_respond_to_mid_mismatch,
+ guard_send_str_aref_not_fixnum,
+
+ guard_send_cfunc_bad_splat_vargs,
+ guard_send_cfunc_block_not_nil,
guard_invokesuper_me_changed,
@@ -427,15 +452,9 @@ make_counters! {
leave_se_interrupt,
leave_interp_return,
- getivar_se_self_not_heap,
- getivar_idx_out_of_range,
getivar_megamorphic,
getivar_not_heap,
- setivar_se_self_not_heap,
- setivar_idx_out_of_range,
- setivar_val_heapobject,
- setivar_name_not_mapped,
setivar_not_heap,
setivar_frozen,
setivar_megamorphic,
@@ -445,6 +464,8 @@ make_counters! {
setlocal_wb_required,
+ invokebuiltin_too_many_args,
+
opt_plus_overflow,
opt_minus_overflow,
opt_mult_overflow,
@@ -458,6 +479,8 @@ make_counters! {
lshift_amount_changed,
lshift_overflow,
+ rshift_amount_changed,
+
opt_aref_argc_not_one,
opt_aref_arg_not_fixnum,
opt_aref_not_array,
@@ -466,8 +489,7 @@ make_counters! {
opt_aset_not_array,
opt_aset_not_fixnum,
opt_aset_not_hash,
-
- opt_aref_with_qundef,
+ opt_aset_frozen,
opt_case_dispatch_megamorphic,
@@ -478,6 +500,7 @@ make_counters! {
expandarray_postarg,
expandarray_not_array,
expandarray_to_ary,
+ expandarray_method_missing,
expandarray_chain_max_depth,
// getblockparam
@@ -497,6 +520,13 @@ make_counters! {
objtostring_not_string,
+ getbyte_idx_not_fixnum,
+ getbyte_idx_negative,
+ getbyte_idx_out_of_bounds,
+
+ splatkw_not_hash,
+ splatkw_not_nil,
+
binding_allocations,
binding_set,
@@ -508,11 +538,16 @@ make_counters! {
compiled_branch_count,
compile_time_ns,
compilation_failure,
+ abandoned_block_count,
block_next_count,
defer_count,
defer_empty_count,
+ deleted_defer_block_count,
branch_insn_count,
branch_known_count,
+ max_inline_versions,
+ inline_block_count,
+ num_contexts_encoded,
freed_iseq_count,
@@ -524,8 +559,9 @@ make_counters! {
invalidate_ractor_spawn,
invalidate_constant_state_bump,
invalidate_constant_ic_fill,
-
- constant_state_bumps,
+ invalidate_no_singleton_class,
+ invalidate_ep_escape,
+ invalidate_everything,
// Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in
// executable memory, so this should be 0.
@@ -541,25 +577,34 @@ make_counters! {
num_send_x86_rel32,
num_send_x86_reg,
num_send_dynamic,
- num_send_inline,
- num_send_leaf_builtin,
num_send_cfunc,
num_send_cfunc_inline,
+ num_send_iseq,
+ num_send_iseq_leaf,
+ num_send_iseq_inline,
num_getivar_megamorphic,
num_setivar_megamorphic,
+ num_opt_case_dispatch_megamorphic,
num_throw,
num_throw_break,
num_throw_retry,
num_throw_return,
+ num_lazy_frame_check,
+ num_lazy_frame_push,
+ lazy_frame_count,
+ lazy_frame_failure,
+
iseq_stack_too_large,
iseq_too_long,
temp_reg_opnd,
temp_mem_opnd,
temp_spill,
+
+ context_cache_hits,
}
//===========================================================================
@@ -590,17 +635,16 @@ pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE
/// Primitive called in yjit.rb.
/// Export all YJIT statistics as a Ruby hash.
#[no_mangle]
-pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE {
- with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue))
+pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, key: VALUE) -> VALUE {
+ with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(key))
}
/// Primitive called in yjit.rb
///
-/// Check if trace_exits generation is enabled. Requires the stats feature
-/// to be enabled.
+/// Check if trace_exits generation is enabled.
#[no_mangle]
pub extern "C" fn rb_yjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
- if get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_some() {
return Qtrue;
}
@@ -617,11 +661,11 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
}
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return Qnil;
}
- // If the stats feature is enabled, pass yjit_raw_samples and yjit_line_samples
+ // Pass yjit_raw_samples and yjit_line_samples
// to the C function called rb_yjit_exit_locations_dict for parsing.
let yjit_raw_samples = YjitExitLocations::get_raw_samples();
let yjit_line_samples = YjitExitLocations::get_line_samples();
@@ -650,21 +694,40 @@ pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char
}
/// Export all YJIT statistics as a Ruby hash.
-fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
+fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
// If YJIT is not enabled, return Qnil
if !yjit_enabled_p() {
return Qnil;
}
- macro_rules! hash_aset_usize {
- ($hash:ident, $counter_name:expr, $value:expr) => {
- let key = rust_str_to_sym($counter_name);
- let value = VALUE::fixnum_from_usize($value);
- rb_hash_aset($hash, key, value);
+ let hash = if key == Qnil {
+ unsafe { rb_hash_new() }
+ } else {
+ Qnil
+ };
+
+ macro_rules! set_stat {
+ ($hash:ident, $name:expr, $value:expr) => {
+ let rb_key = rust_str_to_sym($name);
+ if key == rb_key {
+ return $value;
+ } else if hash != Qnil {
+ rb_hash_aset($hash, rb_key, $value);
+ }
}
}
- let hash = unsafe { rb_hash_new() };
+ macro_rules! set_stat_usize {
+ ($hash:ident, $name:expr, $value:expr) => {
+ set_stat!($hash, $name, VALUE::fixnum_from_usize($value));
+ }
+ }
+
+ macro_rules! set_stat_double {
+ ($hash:ident, $name:expr, $value:expr) => {
+ set_stat!($hash, $name, rb_float_new($value));
+ }
+ }
unsafe {
// Get the inline and outlined code blocks
@@ -672,40 +735,45 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
let ocb = CodegenGlobals::get_outlined_cb();
// Inline code size
- hash_aset_usize!(hash, "inline_code_size", cb.code_size());
+ set_stat_usize!(hash, "inline_code_size", cb.code_size());
// Outlined code size
- hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
+ set_stat_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
// GCed pages
let freed_page_count = cb.num_freed_pages();
- hash_aset_usize!(hash, "freed_page_count", freed_page_count);
+ set_stat_usize!(hash, "freed_page_count", freed_page_count);
// GCed code size
- hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
+ set_stat_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
// Live pages
- hash_aset_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count);
+ set_stat_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count);
// Size of memory region allocated for JIT code
- hash_aset_usize!(hash, "code_region_size", cb.mapped_region_size());
+ set_stat_usize!(hash, "code_region_size", cb.mapped_region_size());
// Rust global allocations in bytes
- hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst));
-
- // `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default
- // for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive.
- if context {
- let live_context_count = get_live_context_count();
- let context_size = std::mem::size_of::<Context>();
- hash_aset_usize!(hash, "live_context_count", live_context_count);
- hash_aset_usize!(hash, "live_context_size", live_context_count * context_size);
- }
+ set_stat_usize!(hash, "yjit_alloc_size", yjit_alloc_size());
+
+ // How many bytes we are using to store context data
+ let context_data = CodegenGlobals::get_context_data();
+ set_stat_usize!(hash, "context_data_bytes", context_data.num_bytes());
+ set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES);
// VM instructions count
- hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
+ if rb_vm_insn_count > 0 {
+ set_stat_usize!(hash, "vm_insns_count", rb_vm_insn_count as usize);
+ }
+
+ set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
+ set_stat_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize);
- hash_aset_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
+ set_stat!(hash, "object_shape_count", rb_object_shape_count());
+
+ // Time since YJIT init in nanoseconds
+ let time_nanos = Instant::now().duration_since(YJIT_INIT_TIME.unwrap()).as_nanos();
+ set_stat_usize!(hash, "yjit_active_ns", time_nanos as usize);
}
// If we're not generating stats, put only default counters
@@ -716,9 +784,9 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
let counter_val = unsafe { *counter_ptr };
// Put counter into hash
- let key = rust_str_to_sym(&counter.get_name());
+ let key = &counter.get_name();
let value = VALUE::fixnum_from_usize(counter_val as usize);
- unsafe { rb_hash_aset(hash, key, value); }
+ unsafe { set_stat!(hash, key, value); }
}
return hash;
@@ -726,71 +794,111 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
unsafe {
// Indicate that the complete set of stats is available
- rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue);
+ set_stat!(hash, "all_stats", Qtrue);
// For each counter we track
for counter_name in COUNTER_NAMES {
// Get the counter value
let counter_ptr = get_counter_ptr(counter_name);
let counter_val = *counter_ptr;
-
- // Put counter into hash
- let key = rust_str_to_sym(counter_name);
- let value = VALUE::fixnum_from_usize(counter_val as usize);
- rb_hash_aset(hash, key, value);
+ set_stat_usize!(hash, counter_name, counter_val as usize);
}
+ let mut side_exits = 0;
+
// For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME"
// and the value is the count of side exits for that instruction.
- for op_idx in 0..VM_INSTRUCTION_SIZE_USIZE {
+ use crate::utils::IntoUsize;
+ for op_idx in 0..rb_vm_instruction_size().as_usize() {
let op_name = insn_name(op_idx);
let key_string = "exit_".to_owned() + &op_name;
- let key = rust_str_to_sym(&key_string);
- let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize);
- rb_hash_aset(hash, key, value);
+ let count = EXIT_OP_COUNT[op_idx];
+ side_exits += count;
+ set_stat_usize!(hash, &key_string, count as usize);
}
- // Create a hash for the cfunc call counts
- let calls_hash = rb_hash_new();
- rb_hash_aset(hash, rust_str_to_sym("cfunc_calls"), calls_hash);
- if let Some(cfunc_name_to_idx) = CFUNC_NAME_TO_IDX.as_mut() {
- let call_counts = CFUNC_CALL_COUNT.as_mut().unwrap();
-
- for (name, idx) in cfunc_name_to_idx {
- let count = call_counts[*idx];
- let key = rust_str_to_sym(name);
- let value = VALUE::fixnum_from_usize(count as usize);
- rb_hash_aset(calls_hash, key, value);
- }
+ set_stat_usize!(hash, "side_exit_count", side_exits as usize);
+
+ let total_exits = side_exits + *get_counter_ptr(&Counter::leave_interp_return.get_name());
+ set_stat_usize!(hash, "total_exit_count", total_exits as usize);
+
+ // Number of instructions that finish executing in YJIT.
+ // See :count-placement: about the subtraction.
+ let retired_in_yjit = *get_counter_ptr(&Counter::yjit_insns_count.get_name()) - side_exits;
+
+ // Average length of instruction sequences executed by YJIT
+ let avg_len_in_yjit: f64 = if total_exits > 0 {
+ retired_in_yjit as f64 / total_exits as f64
+ } else {
+ 0_f64
+ };
+ set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit);
+
+ // Proportion of instructions that retire in YJIT
+ if rb_vm_insn_count > 0 {
+ let total_insns_count = retired_in_yjit + rb_vm_insn_count;
+ set_stat_usize!(hash, "total_insns_count", total_insns_count as usize);
+
+ let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64;
+ set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit);
}
- }
- hash
-}
+ // Set method call counts in a Ruby dict
+ fn set_call_counts(
+ calls_hash: VALUE,
+ method_name_to_idx: &mut Option<HashMap<String, usize>>,
+ method_call_count: &mut Option<Vec<u64>>,
+ ) {
+ if let (Some(name_to_idx), Some(call_counts)) = (method_name_to_idx, method_call_count) {
+ // Create a list of (name, call_count) pairs
+ let mut pairs = Vec::new();
+ for (name, idx) in name_to_idx {
+ let count = call_counts[*idx];
+ pairs.push((name, count));
+ }
+
+ // Sort the vectors by decreasing call counts
+ pairs.sort_by_key(|e| -(e.1 as i64));
-fn get_live_context_count() -> usize {
- let mut count = 0;
- for_each_iseq_payload(|iseq_payload| {
- for blocks in iseq_payload.version_map.iter() {
- for block in blocks.iter() {
- count += unsafe { block.as_ref() }.get_ctx_count();
+ // Cap the number of counts reported to avoid
+ // bloating log files, etc.
+ pairs.truncate(20);
+
+ // Add the pairs to the dict
+ for (name, call_count) in pairs {
+ let key = rust_str_to_sym(name);
+ let value = VALUE::fixnum_from_usize(call_count as usize);
+ unsafe { rb_hash_aset(calls_hash, key, value); }
+ }
}
}
- for block in iseq_payload.dead_blocks.iter() {
- count += unsafe { block.as_ref() }.get_ctx_count();
- }
- });
- count
+
+ // Create a hash for the cfunc call counts
+ set_stat!(hash, "cfunc_calls", {
+ let cfunc_calls = rb_hash_new();
+ set_call_counts(cfunc_calls, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT));
+ cfunc_calls
+ });
+
+ // Create a hash for the ISEQ call counts
+ set_stat!(hash, "iseq_calls", {
+ let iseq_calls = rb_hash_new();
+ set_call_counts(iseq_calls, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT));
+ iseq_calls
+ });
+ }
+
+ hash
}
/// Record the backtrace when a YJIT exit occurs. This functionality requires
-/// that the stats feature is enabled as well as the --yjit-trace-exits option.
+/// the --yjit-trace-exits option.
///
/// This function will fill two Vec's in YjitExitLocations to record the raw samples
/// and line samples. Their length should be the same, however the data stored in
/// them is different.
#[no_mangle]
-pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
+pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE)
{
// Return if YJIT is not enabled
if !yjit_enabled_p() {
@@ -798,7 +906,7 @@ pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
}
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return;
}
@@ -814,10 +922,11 @@ pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
// rb_vm_insn_addr2opcode won't work in cargo test --all-features
// because it's a C function. Without insn call, this function is useless
// so wrap the whole thing in a not test check.
+ let _ = exit_pc;
#[cfg(not(test))]
{
// Get the opcode from the encoded insn handler at this PC
- let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) };
+ let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
// Use the same buffer size as Stackprof.
const BUFF_LEN: usize = 2048;
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index 6bc66ee33e..251628fabf 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -3,6 +3,7 @@
use crate::backend::ir::*;
use crate::cruby::*;
use std::slice;
+use std::os::raw::c_int;
/// Trait for casting to [usize] that allows you to say `.as_usize()`.
/// Implementation conditional on the cast preserving the numeric value on
@@ -51,7 +52,7 @@ impl IntoUsize for u8 {
}
}
-/// The [Into<u64>] Rust does not provide.
+/// The `Into<u64>` Rust does not provide.
/// Convert to u64 with assurance that the value is preserved.
/// Currently, `usize::BITS == 64` holds for all platforms we support.
pub(crate) trait IntoU64 {
@@ -91,10 +92,7 @@ pub fn ruby_str_to_rust(v: VALUE) -> String {
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
- match String::from_utf8(str_slice.to_vec()) {
- Ok(utf8) => utf8,
- Err(_) => String::new(),
- }
+ String::from_utf8(str_slice.to_vec()).unwrap_or_default()
}
// Location is the file defining the method, colon, method name.
@@ -162,8 +160,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) {
}
}
- asm.cpush_all();
-
let argument = match opnd {
Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => {
// Sign-extend the value if necessary
@@ -178,7 +174,6 @@ pub fn print_int(asm: &mut Assembler, opnd: Opnd) {
};
asm.ccall(print_int_fn as *const u8, vec![argument]);
- asm.cpop_all();
}
/// Generate code to print a pointer
@@ -191,9 +186,7 @@ pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) {
assert!(opnd.rm_num_bits() == 64);
- asm.cpush_all();
asm.ccall(print_ptr_fn as *const u8, vec![opnd]);
- asm.cpop_all();
}
/// Generate code to print a value
@@ -206,9 +199,7 @@ pub fn print_value(asm: &mut Assembler, opnd: Opnd) {
assert!(matches!(opnd, Opnd::Value(_)));
- asm.cpush_all();
asm.ccall(print_value_fn as *const u8, vec![opnd]);
- asm.cpop_all();
}
/// Generate code to print constant string to stdout
@@ -223,7 +214,6 @@ pub fn print_str(asm: &mut Assembler, str: &str) {
}
}
- asm.cpush_all();
let string_data = asm.new_label("string_data");
let after_string = asm.new_label("after_string");
@@ -235,8 +225,14 @@ pub fn print_str(asm: &mut Assembler, str: &str) {
let opnd = asm.lea_jump_target(string_data);
asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]);
+}
- asm.cpop_all();
+pub fn stdout_supports_colors() -> bool {
+ // TODO(max): Use std::io::IsTerminal after upgrading Rust to 1.70
+ extern "C" { fn isatty(fd: c_int) -> c_int; }
+ let stdout = 1;
+ let is_terminal = unsafe { isatty(stdout) } == 1;
+ is_terminal
}
#[cfg(test)]
@@ -273,7 +269,7 @@ mod tests {
#[test]
fn test_print_int() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
let mut cb = CodeBlock::new_dummy(1024);
print_int(&mut asm, Opnd::Imm(42));
@@ -282,7 +278,7 @@ mod tests {
#[test]
fn test_print_str() {
- let mut asm = Assembler::new();
+ let mut asm = Assembler::new_without_iseq();
let mut cb = CodeBlock::new_dummy(1024);
print_str(&mut asm, "Hello, world!");
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
index f3c0ceefff..9126cf300e 100644
--- a/yjit/src/virtualmem.rs
+++ b/yjit/src/virtualmem.rs
@@ -3,9 +3,12 @@
// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much
// benefit.
-use std::ptr::NonNull;
+use std::{cell::RefCell, ptr::NonNull};
-use crate::{utils::IntoUsize, backend::ir::Target};
+use crate::{backend::ir::Target, stats::yjit_alloc_size, utils::IntoUsize};
+
+#[cfg(test)]
+use crate::options::get_option;
#[cfg(not(test))]
pub type VirtualMem = VirtualMemory<sys::SystemAllocator>;
@@ -26,15 +29,24 @@ pub struct VirtualMemory<A: Allocator> {
/// Location of the virtual memory region.
region_start: NonNull<u8>,
- /// Size of the region in bytes.
+ /// Size of this virtual memory region in bytes.
region_size_bytes: usize,
+ /// mapped_region_bytes + yjit_alloc_size may not increase beyond this limit.
+ memory_limit_bytes: usize,
+
/// Number of bytes per "page", memory protection permission can only be controlled at this
/// granularity.
page_size_bytes: usize,
+ /// Mutable parts.
+ mutable: RefCell<VirtualMemoryMut<A>>,
+}
+
+/// Mutable parts of [`VirtualMemory`].
+pub struct VirtualMemoryMut<A: Allocator> {
/// Number of bytes that have we have allocated physical memory for starting at
- /// [Self::region_start].
+ /// [VirtualMemory::region_start].
mapped_region_bytes: usize,
/// Keep track of the address of the last written to page.
@@ -106,17 +118,26 @@ use WriteError::*;
impl<A: Allocator> VirtualMemory<A> {
/// Bring a part of the address space under management.
- pub fn new(allocator: A, page_size: u32, virt_region_start: NonNull<u8>, size_bytes: usize) -> Self {
+ pub fn new(
+ allocator: A,
+ page_size: u32,
+ virt_region_start: NonNull<u8>,
+ region_size_bytes: usize,
+ memory_limit_bytes: usize,
+ ) -> Self {
assert_ne!(0, page_size);
let page_size_bytes = page_size.as_usize();
Self {
region_start: virt_region_start,
- region_size_bytes: size_bytes,
+ region_size_bytes,
+ memory_limit_bytes,
page_size_bytes,
- mapped_region_bytes: 0,
- current_write_page: None,
- allocator,
+ mutable: RefCell::new(VirtualMemoryMut {
+ mapped_region_bytes: 0,
+ current_write_page: None,
+ allocator,
+ }),
}
}
@@ -127,7 +148,7 @@ impl<A: Allocator> VirtualMemory<A> {
}
pub fn mapped_end_ptr(&self) -> CodePtr {
- self.start_ptr().add_bytes(self.mapped_region_bytes)
+ self.start_ptr().add_bytes(self.mutable.borrow().mapped_region_bytes)
}
pub fn virtual_end_ptr(&self) -> CodePtr {
@@ -136,7 +157,7 @@ impl<A: Allocator> VirtualMemory<A> {
/// Size of the region in bytes that we have allocated physical memory for.
pub fn mapped_region_size(&self) -> usize {
- self.mapped_region_bytes
+ self.mutable.borrow().mapped_region_bytes
}
/// Size of the region in bytes where writes could be attempted.
@@ -151,19 +172,21 @@ impl<A: Allocator> VirtualMemory<A> {
}
/// Write a single byte. The first write to a page makes it readable.
- pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ pub fn write_byte(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ let mut mutable = self.mutable.borrow_mut();
+
let page_size = self.page_size_bytes;
let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8;
let page_addr = (raw as usize / page_size) * page_size;
- if self.current_write_page == Some(page_addr) {
+ if mutable.current_write_page == Some(page_addr) {
// Writing within the last written to page, nothing to do
} else {
// Switching to a different and potentially new page
let start = self.region_start.as_ptr();
- let mapped_region_end = start.wrapping_add(self.mapped_region_bytes);
+ let mapped_region_end = start.wrapping_add(mutable.mapped_region_bytes);
let whole_region_end = start.wrapping_add(self.region_size_bytes);
- let alloc = &mut self.allocator;
+ let alloc = &mut mutable.allocator;
assert!((start..=whole_region_end).contains(&mapped_region_end));
@@ -175,8 +198,9 @@ impl<A: Allocator> VirtualMemory<A> {
return Err(FailedPageMapping);
}
- self.current_write_page = Some(page_addr);
- } else if (start..whole_region_end).contains(&raw) {
+ mutable.current_write_page = Some(page_addr);
+ } else if (start..whole_region_end).contains(&raw) &&
+ (page_addr + page_size - start as usize) + yjit_alloc_size() < self.memory_limit_bytes {
// Writing to a brand new page
let mapped_region_end_addr = mapped_region_end as usize;
let alloc_size = page_addr - mapped_region_end_addr + page_size;
@@ -206,9 +230,9 @@ impl<A: Allocator> VirtualMemory<A> {
unreachable!("unknown arch");
}
}
- self.mapped_region_bytes = self.mapped_region_bytes + alloc_size;
+ mutable.mapped_region_bytes = mutable.mapped_region_bytes + alloc_size;
- self.current_write_page = Some(page_addr);
+ mutable.current_write_page = Some(page_addr);
} else {
return Err(OutOfBounds);
}
@@ -220,20 +244,41 @@ impl<A: Allocator> VirtualMemory<A> {
Ok(())
}
+ /// Make all the code in the region writeable.
+ /// Call this during GC before the phase of updating reference fields.
+ pub fn mark_all_writeable(&self) {
+ let mut mutable = self.mutable.borrow_mut();
+
+ mutable.current_write_page = None;
+
+ let region_start = self.region_start;
+ let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap();
+
+ // Make mapped region executable
+ if !mutable.allocator.mark_writable(region_start.as_ptr(), mapped_region_bytes) {
+ panic!("Cannot make memory region writable: {:?}-{:?}",
+ region_start.as_ptr(),
+ unsafe { region_start.as_ptr().add(mapped_region_bytes as usize)}
+ );
+ }
+ }
+
/// Make all the code in the region executable. Call this at the end of a write session.
/// See [Self] for usual usage flow.
- pub fn mark_all_executable(&mut self) {
- self.current_write_page = None;
+ pub fn mark_all_executable(&self) {
+ let mut mutable = self.mutable.borrow_mut();
+
+ mutable.current_write_page = None;
let region_start = self.region_start;
- let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
+ let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap();
// Make mapped region executable
- self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes);
+ mutable.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes);
}
/// Free a range of bytes. start_ptr must be memory page-aligned.
- pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
+ pub fn free_bytes(&self, start_ptr: CodePtr, size: u32) {
assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0);
// Bounds check the request. We should only free memory we manage.
@@ -246,7 +291,8 @@ impl<A: Allocator> VirtualMemory<A> {
// code page, it's more appropriate to check the last byte against the virtual region.
assert!(virtual_region.contains(&last_byte_to_free));
- self.allocator.mark_unused(start_ptr.raw_ptr(self), size);
+ let mut mutable = self.mutable.borrow_mut();
+ mutable.allocator.mark_unused(start_ptr.raw_ptr(self), size);
}
}
@@ -273,15 +319,15 @@ mod sys {
impl super::Allocator for SystemAllocator {
fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool {
- unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) }
+ unsafe { rb_jit_mark_writable(ptr as VoidPtr, size) }
}
fn mark_executable(&mut self, ptr: *const u8, size: u32) {
- unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
+ unsafe { rb_jit_mark_executable(ptr as VoidPtr, size) }
}
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool {
- unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) }
+ unsafe { rb_jit_mark_unused(ptr as VoidPtr, size) }
}
}
}
@@ -368,17 +414,18 @@ pub mod tests {
PAGE_SIZE.try_into().unwrap(),
NonNull::new(mem_start as *mut u8).unwrap(),
mem_size,
+ get_option!(mem_size),
)
}
#[test]
#[cfg(target_arch = "x86_64")]
fn new_memory_is_initialized() {
- let mut virt = new_dummy_virt_mem();
+ let virt = new_dummy_virt_mem();
virt.write_byte(virt.start_ptr(), 1).unwrap();
assert!(
- virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
+ virt.mutable.borrow().allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
"Entire page should be initialized",
);
@@ -386,21 +433,21 @@ pub mod tests {
let three_pages = 3 * PAGE_SIZE;
virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap();
assert!(
- virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
+ virt.mutable.borrow().allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
"Gaps between write requests should be filled",
);
}
#[test]
fn no_redundant_syscalls_when_writing_to_the_same_page() {
- let mut virt = new_dummy_virt_mem();
+ let virt = new_dummy_virt_mem();
virt.write_byte(virt.start_ptr(), 1).unwrap();
virt.write_byte(virt.start_ptr(), 0).unwrap();
assert!(
matches!(
- virt.allocator.requests[..],
+ virt.mutable.borrow().allocator.requests[..],
[MarkWritable { start_idx: 0, length: PAGE_SIZE }],
)
);
@@ -409,7 +456,7 @@ pub mod tests {
#[test]
fn bounds_checking() {
use super::WriteError::*;
- let mut virt = new_dummy_virt_mem();
+ let virt = new_dummy_virt_mem();
let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
@@ -422,7 +469,7 @@ pub mod tests {
fn only_written_to_regions_become_executable() {
// ... so we catch attempts to read/write/execute never-written-to regions
const THREE_PAGES: usize = PAGE_SIZE * 3;
- let mut virt = new_dummy_virt_mem();
+ let virt = new_dummy_virt_mem();
let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2);
virt.write_byte(page_two_start, 1).unwrap();
virt.mark_all_executable();
@@ -430,7 +477,7 @@ pub mod tests {
assert!(virt.virtual_region_size() > THREE_PAGES);
assert!(
matches!(
- virt.allocator.requests[..],
+ virt.mutable.borrow().allocator.requests[..],
[
MarkWritable { start_idx: 0, length: THREE_PAGES },
MarkExecutable { start_idx: 0, length: THREE_PAGES },
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
index 50335a7987..517a0daae5 100644
--- a/yjit/src/yjit.rs
+++ b/yjit/src/yjit.rs
@@ -7,7 +7,9 @@ use crate::stats::YjitExitLocations;
use crate::stats::incr_counter;
use crate::stats::with_compile_time;
-use std::os::raw;
+use std::os::raw::{c_char, c_int};
+use std::time::Instant;
+use crate::log::Log;
/// Is YJIT on? The interpreter uses this variable to decide whether to trigger
/// compilation. See jit_exec() and jit_compile().
@@ -15,13 +17,21 @@ use std::os::raw;
#[no_mangle]
pub static mut rb_yjit_enabled_p: bool = false;
+// Time when YJIT was yjit was initialized (see yjit_init)
+pub static mut YJIT_INIT_TIME: Option<Instant> = None;
+
/// Parse one command-line option.
/// This is called from ruby.c
#[no_mangle]
-pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool {
+pub extern "C" fn rb_yjit_parse_option(str_ptr: *const c_char) -> bool {
return parse_option(str_ptr).is_some();
}
+#[no_mangle]
+pub extern "C" fn rb_yjit_option_disable() -> bool {
+ return get_option!(disable);
+}
+
/// Like rb_yjit_enabled_p, but for Rust code.
pub fn yjit_enabled_p() -> bool {
unsafe { rb_yjit_enabled_p }
@@ -34,7 +44,7 @@ pub extern "C" fn rb_yjit_init(yjit_enabled: bool) {
yjit_reg_method_codegen_fns();
// If --yjit-disable, yjit_init() will not be called until RubyVM::YJIT.enable.
- if yjit_enabled && !get_option!(disable) {
+ if yjit_enabled {
yjit_init();
}
}
@@ -44,6 +54,12 @@ fn yjit_init() {
// TODO: need to make sure that command-line options have been
// initialized by CRuby
+ // Call YJIT hooks before enabling YJIT to avoid compiling the hooks themselves
+ unsafe {
+ let yjit = rb_const_get(rb_cRubyVM, rust_str_to_id("YJIT"));
+ rb_funcall(yjit, rust_str_to_id("call_jit_hooks"), 0);
+ }
+
// Catch panics to avoid UB for unwinding into C frames.
// See https://doc.rust-lang.org/nomicon/exception-safety.html
let result = std::panic::catch_unwind(|| {
@@ -65,17 +81,21 @@ fn yjit_init() {
}
// Make sure --yjit-perf doesn't append symbols to an old file
- if get_option!(perf_map) {
+ if get_option!(perf_map).is_some() {
let perf_map = format!("/tmp/perf-{}.map", std::process::id());
let _ = std::fs::remove_file(&perf_map);
println!("YJIT perf map: {perf_map}");
}
- // Initialize the GC hooks. Do this at last as some code depend on Rust initialization.
- extern "C" {
- fn rb_yjit_init_gc_hooks();
+ // Note the time when YJIT was initialized
+ unsafe {
+ YJIT_INIT_TIME = Some(Instant::now());
}
- unsafe { rb_yjit_init_gc_hooks() }
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_free_at_exit() {
+ yjit_shutdown_free_codegen_table();
}
/// At the moment, we abort in all cases we panic.
@@ -102,14 +122,17 @@ fn rb_bug_panic_hook() {
env::set_var("RUST_BACKTRACE", "1");
previous_hook(panic_info);
- unsafe { rb_bug(b"YJIT panicked\0".as_ref().as_ptr() as *const raw::c_char); }
+ // Abort with rb_bug(). It has a length limit on the message.
+ let panic_message = &format!("{}", panic_info)[..];
+ let len = std::cmp::min(0x100, panic_message.len()) as c_int;
+ unsafe { rb_bug(b"YJIT: %*s\0".as_ref().as_ptr() as *const c_char, len, panic_message.as_ptr()); }
}));
}
/// Called from C code to begin compiling a function
/// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side
/// If jit_exception is true, compile JIT code for handling exceptions.
-/// See [jit_compile_exception] for details.
+/// See jit_compile_exception() for details.
#[no_mangle]
pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> *const u8 {
// Don't compile when there is insufficient native stack space
@@ -168,8 +191,24 @@ pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
/// Enable YJIT compilation, returning true if YJIT was previously disabled
#[no_mangle]
-pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE) -> VALUE {
+pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE, gen_log: VALUE, print_log: VALUE, mem_size: VALUE, call_threshold: VALUE) -> VALUE {
with_vm_lock(src_loc!(), || {
+
+ if !mem_size.nil_p() {
+ let mem_size_mb = mem_size.as_isize() >> 1;
+ let mem_size_bytes = mem_size_mb * 1024 * 1024;
+ unsafe {
+ OPTIONS.mem_size = mem_size_bytes as usize;
+ }
+ }
+
+ if !call_threshold.nil_p() {
+ let threshold = call_threshold.as_isize() >> 1;
+ unsafe {
+ rb_yjit_call_threshold = threshold as u64;
+ }
+ }
+
// Initialize and enable YJIT
if gen_stats.test() {
unsafe {
@@ -177,6 +216,19 @@ pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE
OPTIONS.print_stats = print_stats.test();
}
}
+
+ if gen_log.test() {
+ unsafe {
+ if print_log.test() {
+ OPTIONS.log = Some(LogOutput::Stderr);
+ } else {
+ OPTIONS.log = Some(LogOutput::MemoryOnly);
+ }
+
+ Log::init();
+ }
+ }
+
yjit_init();
// Add "+YJIT" to RUBY_DESCRIPTION
@@ -207,3 +259,19 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA
return Qnil;
}
+
+/// Push a C method frame if the given PC is supposed to lazily push one.
+/// This is called from rb_raise() (at rb_exc_new_str()) and other functions
+/// that may make a method call (e.g. rb_to_int()).
+#[no_mangle]
+pub extern "C" fn rb_yjit_lazy_push_frame(pc: *mut VALUE) {
+ if !yjit_enabled_p() {
+ return;
+ }
+
+ incr_counter!(num_lazy_frame_check);
+ if let Some(&(cme, recv_idx)) = CodegenGlobals::get_pc_to_cfunc().get(&pc) {
+ incr_counter!(num_lazy_frame_push);
+ unsafe { rb_vm_push_cfunc_frame(cme, recv_idx as i32) }
+ }
+}
diff --git a/yjit/yjit.mk b/yjit/yjit.mk
index 1a8a3b8869..21fd96514b 100644
--- a/yjit/yjit.mk
+++ b/yjit/yjit.mk
@@ -1,95 +1,46 @@
# -*- mode: makefile-gmake; indent-tabs-mode: t -*-
-# Show Cargo progress when doing `make V=1`
-CARGO_VERBOSE_0 = -q
-CARGO_VERBOSE_1 =
-CARGO_VERBOSE = $(CARGO_VERBOSE_$(V))
-
YJIT_SRC_FILES = $(wildcard \
$(top_srcdir)/yjit/Cargo.* \
$(top_srcdir)/yjit/src/*.rs \
$(top_srcdir)/yjit/src/*/*.rs \
$(top_srcdir)/yjit/src/*/*/*.rs \
$(top_srcdir)/yjit/src/*/*/*/*.rs \
+ $(top_srcdir)/jit/src/lib.rs \
)
-# Because of Cargo cache, if the actual binary is not changed from the
-# previous build, the mtime is preserved as the cached file.
-# This means the target is not updated actually, and it will need to
-# rebuild at the next build.
-YJIT_LIB_TOUCH = touch $@
+# Absolute path to match RUST_LIB rules to avoid picking
+# the "target" dir in the source directory through VPATH.
+BUILD_YJIT_LIBS = $(TOP_BUILD_DIR)/$(YJIT_LIBS)
-# YJIT_SUPPORT=yes when `configure` gets `--enable-yjit`
-ifeq ($(YJIT_SUPPORT),yes)
-$(YJIT_LIBS): $(YJIT_SRC_FILES)
+# In a YJIT-only build (no ZJIT)
+ifneq ($(strip $(YJIT_LIBS)),)
+yjit-libs: $(BUILD_YJIT_LIBS)
+$(BUILD_YJIT_LIBS): $(YJIT_SRC_FILES)
$(ECHO) 'building Rust YJIT (release mode)'
- $(Q) $(RUSTC) $(YJIT_RUSTC_ARGS)
- $(YJIT_LIB_TOUCH)
-else ifeq ($(YJIT_SUPPORT),no)
-$(YJIT_LIBS):
- $(ECHO) 'Error: Tried to build YJIT without configuring it first. Check `make showconfig`?'
- @false
-else ifeq ($(YJIT_SUPPORT),$(filter dev dev_nodebug stats,$(YJIT_SUPPORT)))
-$(YJIT_LIBS): $(YJIT_SRC_FILES)
- $(ECHO) 'building Rust YJIT ($(YJIT_SUPPORT) mode)'
- $(Q)$(CHDIR) $(top_srcdir)/yjit && \
- CARGO_TARGET_DIR='$(CARGO_TARGET_DIR)' \
- CARGO_TERM_PROGRESS_WHEN='never' \
- $(CARGO) $(CARGO_VERBOSE) build $(CARGO_BUILD_ARGS)
- $(YJIT_LIB_TOUCH)
-else
-endif
+ $(gnumake_recursive)$(Q) $(RUSTC) $(YJIT_RUSTC_ARGS)
+else ifneq ($(strip $(RLIB_DIR)),) # combo build
+# Absolute path to avoid VPATH ambiguity
+YJIT_RLIB = $(TOP_BUILD_DIR)/$(RLIB_DIR)/libyjit.rlib
-yjit-libobj: $(YJIT_LIBOBJ)
-
-YJIT_LIB_SYMBOLS = $(YJIT_LIBS:.a=).symbols
-$(YJIT_LIBOBJ): $(YJIT_LIBS)
- $(ECHO) 'partial linking $(YJIT_LIBS) into $@'
-ifneq ($(findstring darwin,$(target_os)),)
- $(Q) $(CC) -nodefaultlibs -r -o $@ -exported_symbols_list $(YJIT_LIB_SYMBOLS) $(YJIT_LIBS)
-else
- $(Q) $(LD) -r -o $@ --whole-archive $(YJIT_LIBS)
- -$(Q) $(OBJCOPY) --wildcard --keep-global-symbol='$(SYMBOL_PREFIX)rb_*' $(@)
-endif
+$(YJIT_RLIB): $(YJIT_SRC_FILES)
+ $(ECHO) 'building $(@F)'
+ $(gnumake_recursive)$(Q) $(RUSTC) '-L$(@D)' --extern=jit $(YJIT_RUSTC_ARGS)
-# For Darwin only: a list of symbols that we want the glommed Rust static lib to export.
-# Unfortunately, using wildcard like '_rb_*' with -exported-symbol does not work, at least
-# not on version 820.1. Assume llvm-nm, so XCode 8.0 (from 2016) or newer.
-#
-# The -exported_symbols_list pulls out the right archive members. Symbols not listed
-# in the list are made private extern, which are in turn made local as we're using `ld -r`.
-# Note, section about -keep_private_externs in ld's man page hints at this behavior on which
-# we rely.
-ifneq ($(findstring darwin,$(target_os)),)
-$(YJIT_LIB_SYMBOLS): $(YJIT_LIBS)
- $(Q) $(tooldir)/darwin-ar $(NM) --defined-only --extern-only $(YJIT_LIBS) | \
- sed -n -e 's/.* //' -e '/^$(SYMBOL_PREFIX)rb_/p' \
- -e '/^$(SYMBOL_PREFIX)rust_eh_personality/p' \
- > $@
+$(RUST_LIB): $(YJIT_RLIB)
+endif # ifneq ($(strip $(YJIT_LIBS)),)
-$(YJIT_LIBOBJ): $(YJIT_LIB_SYMBOLS)
+ifneq ($(YJIT_SUPPORT),no)
+$(RUST_LIB): $(YJIT_SRC_FILES)
endif
-# By using YJIT_BENCH_OPTS instead of RUN_OPTS, you can skip passing the options to `make install`
-YJIT_BENCH_OPTS = $(RUN_OPTS) --enable-gems
-YJIT_BENCH = benchmarks/railsbench/benchmark.rb
-
-# Run yjit-bench's ./run_once.sh for CI
-yjit-bench: install update-yjit-bench PHONY
- $(Q) cd $(srcdir)/yjit-bench && PATH=$(prefix)/bin:$$PATH \
- ./run_once.sh $(YJIT_BENCH_OPTS) $(YJIT_BENCH)
-
-update-yjit-bench:
- $(Q) $(tooldir)/git-refresh -C $(srcdir) --branch main \
- https://github.com/Shopify/yjit-bench yjit-bench $(GIT_OPTS)
-
RUST_VERSION = +1.58.0
# Gives quick feedback about YJIT. Not a replacement for a full test run.
-.PHONY: yjit-smoke-test
-yjit-smoke-test:
+.PHONY: yjit-check
+yjit-check:
ifneq ($(strip $(CARGO)),)
- $(CARGO) $(RUST_VERSION) test --all-features -q --manifest-path='$(top_srcdir)/yjit/Cargo.toml'
+ $(CARGO) test --all-features -q --manifest-path='$(top_srcdir)/yjit/Cargo.toml'
endif
$(MAKE) btest RUN_OPTS='--yjit-call-threshold=1' BTESTS=-j
$(MAKE) test-all TESTS='$(top_srcdir)/test/ruby/test_yjit.rb'