Rust YJIT

In December 2021, we opened an [issue] to solicit feedback regarding the porting of the YJIT codebase from C99 to Rust. There were some reservations, but this project was given the go ahead by Ruby core developers and Matz. Since then, we have successfully completed the port of YJIT to Rust. The new Rust version of YJIT has reached parity with the C version, in that it passes all the CRuby tests, is able to run all of the YJIT benchmarks, and performs similarly to the C version (because it works the same way and largely generates the same machine code). We've even incorporated some design improvements, such as a more fine-grained constant invalidation mechanism which we expect will make a big difference in Ruby on Rails applications. Because we want to be careful, YJIT is guarded behind a configure option: ```shell ./configure --enable-yjit # Build YJIT in release mode ./configure --enable-yjit=dev # Build YJIT in dev/debug mode ``` By default, YJIT does not get compiled and cargo/rustc is not required. If YJIT is built in dev mode, then `cargo` is used to fetch development dependencies, but when building in release, `cargo` is not required, only `rustc`. At the moment YJIT requires Rust 1.60.0 or newer. The YJIT command-line options remain mostly unchanged, and more details about the build process are documented in `doc/yjit/yjit.md`. The CI tests have been updated and do not take any more resources than before. The development history of the Rust port is available at the following commit for interested parties: https://github.com/Shopify/ruby/commit/1fd9573d8b4b65219f1c2407f30a0a60e537f8be Our hope is that Rust YJIT will be compiled and included as a part of system packages and compiled binaries of the Ruby 3.2 release. We do not anticipate any major problems as Rust is well supported on every platform which YJIT supports, but to make sure that this process works smoothly, we would like to reach out to those who take care of building systems packages before the 3.2 release is shipped and resolve any issues that may come up. [issue]: https://bugs.ruby-lang.org/issues/18481 Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com> Co-authored-by: Noah Gibbs <the.codefolio.guy@gmail.com> Co-authored-by: Kevin Newton <kddnewton@gmail.com>
author: Alan Wu <alanwu@ruby-lang.org> 2022-04-19 14:40:21 -0400
committer: Alan Wu <XrXr@users.noreply.github.com> 2022-04-27 11:00:22 -0400
commit: f90549cd38518231a6a74432fe1168c943a7cc18 (patch)
tree: c277bbfab47e230bd549bd5f607f60c3e812a714 /yjit
parent: f553180a86b71830a1de49dd04874b3880c5c698 (diff)
20 files changed, 14425 insertions, 0 deletions
diff --git a/yjit/.gitignore b/yjit/.gitignore
new file mode 100644
index 0000000000..e208d6dc85
--- /dev/null
+++ b/yjit/.gitignore
@@ -0,0 +1,2 @@
+# Build output
+target/
diff --git a/yjit/Cargo.lock b/yjit/Cargo.lock
new file mode 100644
index 0000000000..e9a59cb771
--- /dev/null
+++ b/yjit/Cargo.lock
@@ -0,0 +1,42 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "capstone"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66b5d1f14c3539b6ff22fcb602fea5f1c4416148c8b7965a2e74860aa169b7b5"
+dependencies = [
+ "capstone-sys",
+ "libc",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df653a22d0ad34b0d91cc92a6289d96e44aac1c9a96250a094c9aeec4a91084f"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+
+[[package]]
+name = "libc"
+version = "0.2.124"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
+
+[[package]]
+name = "yjit"
+version = "0.1.0"
+dependencies = [
+ "capstone",
+]
diff --git a/yjit/Cargo.toml b/yjit/Cargo.toml
new file mode 100644
index 0000000000..8969f897ad
--- /dev/null
+++ b/yjit/Cargo.toml
@@ -0,0 +1,39 @@
+# NOTE: please avoid adding dependencies to external crates as these can
+# make building and packaging YJIT more challenging.
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[package]
+name = "yjit"
+version = "0.1.0"       # YJIT version
+edition = "2021"        # Rust 2021 edition to compile with
+rust-version = "1.60.0" # Minimally supported rust version
+publish = false         # Don't publish to crates.io
+
+[lib]
+crate-type = ["staticlib"]
+
+[dependencies]
+# No required dependencies to simplify build process. TODO: Link to yet to be
+# written rationale. Optional For development and testing purposes
+capstone = { version = "0.10.0", optional = true }
+
+[features]
+# NOTE: Development builds select a set of these via configure.ac
+# For debugging, `make V=1` shows exact cargo invocation.
+disasm = ["capstone"]
+stats = []
+asm_comments = []
+
+[profile.dev]
+opt-level = 0
+debug = true
+debug-assertions = true
+overflow-checks = true
+
+[profile.release]
+# NOTE: --enable-yjit builds use `rustc` without going through Cargo. You
+# might want to update the `rustc` invocation if you change this profile.
+opt-level = 3
+# The extra robustness that comes from checking for arithmetic overflow is
+# worth the performance cost for the compiler.
+overflow-checks = true
diff --git a/yjit/bindgen/Cargo.lock b/yjit/bindgen/Cargo.lock
new file mode 100644
index 0000000000..cf23cbf783
--- /dev/null
+++ b/yjit/bindgen/Cargo.lock
@@ -0,0 +1,345 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.59.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "clap",
+ "env_logger",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "which",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clang-sys"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cc00842eed744b858222c4c9faf7243aafc6d33f92f96935263ef4d8a41ce21"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "env_logger"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.124"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
+
+[[package]]
+name = "libloading"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd"
+dependencies = [
+ "cfg-if",
+ "winapi",
+]
+
+[[package]]
+name = "log"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "shlex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "termcolor"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "unicode-width"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "which"
+version = "4.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c4fb54e6113b6a8772ee41c3404fb0301ac79604489467e0a9ce1f3e97c24ae"
+dependencies = [
+ "either",
+ "lazy_static",
+ "libc",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "yjit-bindgen"
+version = "0.1.0"
+dependencies = [
+ "bindgen",
+]
diff --git a/yjit/bindgen/Cargo.toml b/yjit/bindgen/Cargo.toml
new file mode 100644
index 0000000000..18a8fbc463
--- /dev/null
+++ b/yjit/bindgen/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "yjit-bindgen"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bindgen = "0.59.2"
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
new file mode 100644
index 0000000000..c7766a2992
--- /dev/null
+++ b/yjit/bindgen/src/main.rs
@@ -0,0 +1,286 @@
+//! See https://docs.rs/bindgen/0.59.2/bindgen/struct.Builder.html
+//! This is the binding generation tool that the YJIT cruby module talks about.
+//! More docs later once we have more experience with this, for now, check
+//! the output to make sure it looks reasonable and allowlist things you want
+//! to use in Rust.
+extern crate bindgen;
+
+use std::env;
+use std::path::PathBuf;
+
+const SRC_ROOT_ENV: &str = "YJIT_SRC_ROOT_PATH";
+
+fn main() {
+    // Path to repo is a required input for supporting running `configure`
+    // in a directory away from the code.
+    let src_root = env::var(SRC_ROOT_ENV).expect(
+        format!(
+            r#"The "{}" env var must be a path to the root of the Ruby repo"#,
+            SRC_ROOT_ENV
+        )
+        .as_ref(),
+    );
+    let src_root = PathBuf::from(src_root);
+
+    assert!(
+        src_root.is_dir(),
+        "{} must be set to a path to a directory",
+        SRC_ROOT_ENV
+    );
+
+    // Remove this flag so rust-bindgen generates bindings
+    // that are internal functions not public in libruby
+    let filtered_clang_args = env::args().filter(|arg| arg != "-fvisibility=hidden");
+
+    let bindings = bindgen::builder()
+        .clang_args(filtered_clang_args)
+        .header("internal.h")
+        .header("internal/re.h")
+        .header("include/ruby/ruby.h")
+        .header("vm_core.h")
+        .header("vm_callinfo.h")
+
+        // Our C file for glue code
+        .header(src_root.join("yjit.c").to_str().unwrap())
+
+        // Don't want to copy over C comment
+        .generate_comments(false)
+
+        // Don't want layout tests as they are platform dependent
+        .layout_tests(false)
+
+        // Block for stability since output is different on Darwin and Linux
+        .blocklist_type("size_t")
+        .blocklist_type("fpos_t")
+
+        // Prune these types since they are system dependant and we don't use them
+        .blocklist_type("__.*")
+
+        // From include/ruby/internal/intern/string.h
+        .allowlist_function("rb_utf8_str_new")
+
+        // This struct is public to Ruby C extensions
+        // From include/ruby/internal/core/rbasic.h
+        .allowlist_type("RBasic")
+
+        // From internal.h
+        // This function prints info about a value and is useful for debugging
+        .allowlist_function("rb_obj_info_dump")
+
+        // From ruby/internal/intern/object.h
+        .allowlist_function("rb_obj_is_kind_of")
+
+        // From include/hash.h
+        .allowlist_function("rb_hash_new")
+
+        // From internal/hash.h
+        .allowlist_function("rb_hash_new_with_size")
+        .allowlist_function("rb_hash_resurrect")
+
+        // From include/ruby/internal/intern/hash.h
+        .allowlist_function("rb_hash_aset")
+        .allowlist_function("rb_hash_aref")
+        .allowlist_function("rb_hash_bulk_insert")
+
+        // From include/ruby/internal/intern/array.h
+        .allowlist_function("rb_ary_new_capa")
+        .allowlist_function("rb_ary_store")
+        .allowlist_function("rb_ary_resurrect")
+        .allowlist_function("rb_ary_clear")
+
+        // From internal/array.h
+        .allowlist_function("rb_ec_ary_new_from_values")
+        .allowlist_function("rb_ary_tmp_new_from_values")
+
+        // From include/ruby/internal/intern/class.h
+        .allowlist_function("rb_singleton_class")
+
+        // From include/ruby/internal/core/rclass.h
+        .allowlist_function("rb_class_get_superclass")
+
+        // From include/ruby/internal/intern/gc.h
+        .allowlist_function("rb_gc_mark")
+        .allowlist_function("rb_gc_mark_movable")
+        .allowlist_function("rb_gc_location")
+
+        // VALUE variables for Ruby class objects
+        // From include/ruby/internal/globals.h
+        .allowlist_var("rb_cBasicObject")
+        .allowlist_var("rb_cModule")
+        .allowlist_var("rb_cNilClass")
+        .allowlist_var("rb_cTrueClass")
+        .allowlist_var("rb_cFalseClass")
+        .allowlist_var("rb_cInteger")
+        .allowlist_var("rb_cSymbol")
+        .allowlist_var("rb_cFloat")
+        .allowlist_var("rb_cString")
+        .allowlist_var("rb_cThread")
+        .allowlist_var("rb_cArray")
+        .allowlist_var("rb_cHash")
+
+        // From ruby/internal/globals.h
+        .allowlist_var("rb_mKernel")
+
+        // From vm_callinfo.h
+        .allowlist_type("VM_CALL.*")         // This doesn't work, possibly due to the odd structure of the #defines
+        .allowlist_type("vm_call_flag_bits") // So instead we include the other enum and do the bit-shift ourselves.
+        .allowlist_type("rb_call_data")
+        .blocklist_type("rb_callcache.*")      // Not used yet - opaque to make it easy to import rb_call_data
+        .opaque_type("rb_callcache.*")
+        .blocklist_type("rb_callinfo_kwarg") // Contains a VALUE[] array of undefined size, so we don't import
+        .opaque_type("rb_callinfo_kwarg")
+        .allowlist_type("rb_callinfo")
+
+        // From vm_insnhelper.h
+        .allowlist_var("VM_ENV_DATA_INDEX_ME_CREF")
+        .allowlist_var("rb_block_param_proxy")
+
+        // From include/ruby/internal/intern/range.h
+        .allowlist_function("rb_range_new")
+
+        // From include/ruby/internal/symbol.h
+        .allowlist_function("rb_intern")
+        .allowlist_function("rb_id2sym")
+        .allowlist_function("rb_sym2id")
+        .allowlist_function("rb_str_intern")
+
+        // From internal/string.h
+        .allowlist_function("rb_ec_str_resurrect")
+        .allowlist_function("rb_str_concat_literals")
+        .allowlist_function("rb_obj_as_string_result")
+
+        // From include/ruby/internal/intern/parse.h
+        .allowlist_function("rb_backref_get")
+
+        // From include/ruby/internal/intern/re.h
+        .allowlist_function("rb_reg_last_match")
+        .allowlist_function("rb_reg_match_pre")
+        .allowlist_function("rb_reg_match_post")
+        .allowlist_function("rb_reg_match_last")
+        .allowlist_function("rb_reg_nth_match")
+
+        // From internal/re.h
+        .allowlist_function("rb_reg_new_ary")
+
+        // `ruby_value_type` is a C enum and this stops it from
+        // prefixing all the members with the name of the type
+        .prepend_enum_name(false)
+        .translate_enum_integer_types(true) // so we get fixed width Rust types for members
+        // From include/ruby/internal/value_type.h
+        .allowlist_type("ruby_value_type") // really old C extension API
+
+        // Autogenerated into id.h
+        .allowlist_type("ruby_method_ids")
+
+        // From method.h
+        .allowlist_type("rb_method_visibility_t")
+        .allowlist_type("rb_method_type_t")
+        .allowlist_type("method_optimized_type")
+        .allowlist_type("rb_callable_method_entry_t")
+        .allowlist_type("rb_callable_method_entry_struct")
+        .allowlist_function("rb_method_entry_at")
+        .allowlist_type("rb_method_entry_t")
+        .blocklist_type("rb_method_cfunc_t")
+        .blocklist_type("rb_method_definition_.*") // Large struct with a bitfield and union of many types - don't import (yet?)
+        .opaque_type("rb_method_definition_.*")
+
+        // From vm_core.h
+        .allowlist_var("rb_mRubyVMFrozenCore")
+        .allowlist_var("VM_BLOCK_HANDLER_NONE")
+        .allowlist_type("vm_frame_env_flags")
+        .allowlist_type("rb_seq_param_keyword_struct")
+        .allowlist_type("ruby_basic_operators")
+        .allowlist_var(".*_REDEFINED_OP_FLAG")
+        .allowlist_type("rb_num_t")
+        .allowlist_function("rb_callable_method_entry")
+        .allowlist_function("rb_vm_frame_method_entry")
+        .allowlist_type("IVC") // pointer to iseq_inline_iv_cache_entry
+        .allowlist_type("IC")  // pointer to iseq_inline_constant_cache
+        .allowlist_type("iseq_inline_constant_cache_entry")
+        .blocklist_type("rb_cref_t")         // don't need this directly, opaqued to allow IC import
+        .opaque_type("rb_cref_t")
+        .allowlist_type("iseq_inline_iv_cache_entry")
+        .allowlist_type("ICVARC") // pointer to iseq_inline_cvar_cache_entry
+        .allowlist_type("iseq_inline_cvar_cache_entry")
+        .blocklist_type("rb_execution_context_.*") // Large struct with various-type fields and an ifdef, so we don't import
+        .opaque_type("rb_execution_context_.*")
+        .blocklist_type("rb_control_frame_struct")
+        .opaque_type("rb_control_frame_struct")
+
+        // From yjit.c
+        .allowlist_function("rb_iseq_(get|set)_yjit_payload")
+        .allowlist_function("rb_iseq_pc_at_idx")
+        .allowlist_function("rb_iseq_opcode_at_pc")
+        .allowlist_function("rb_yjit_mark_writable")
+        .allowlist_function("rb_yjit_mark_executable")
+        .allowlist_function("rb_yjit_get_page_size")
+        .allowlist_function("rb_leaf_invokebuiltin_iseq_p")
+        .allowlist_function("rb_leaf_builtin_function")
+        .allowlist_function("rb_set_cfp_(pc|sp)")
+        .allowlist_function("rb_cfp_get_iseq")
+        .allowlist_function("rb_yjit_multi_ractor_p")
+        .allowlist_function("rb_c_method_tracing_currently_enabled")
+        .allowlist_function("rb_full_cfunc_return")
+        .allowlist_function("rb_yjit_vm_lock_then_barrier")
+        .allowlist_function("rb_yjit_vm_unlock")
+        .allowlist_function("rb_assert_(iseq|cme)_handle")
+        .allowlist_function("rb_IMEMO_TYPE_P")
+        .allowlist_function("rb_iseq_reset_jit_func")
+        .allowlist_function("rb_yjit_dump_iseq_loc")
+        .allowlist_function("rb_yjit_for_each_iseq")
+        .allowlist_function("rb_yjit_obj_written")
+
+        // from vm_sync.h
+        .allowlist_function("rb_vm_barrier")
+
+        // Not sure why it's picking these up, but don't.
+        .blocklist_type("FILE")
+        .blocklist_type("_IO_.*")
+
+        // From internal/compile.h
+        .allowlist_function("rb_vm_insn_decode")
+
+        // From iseq.h
+        .allowlist_function("rb_vm_insn_addr2opcode")
+        .allowlist_function("rb_iseqw_to_iseq")
+        .allowlist_function("rb_iseq_each")
+
+        // From builtin.h
+        .allowlist_type("rb_builtin_function.*")
+
+        // From internal/variable.h
+        .allowlist_function("rb_gvar_(get|set)")
+        .allowlist_function("rb_obj_ensure_iv_index_mapping")
+
+        // From include/ruby/internal/intern/variable.h
+        .allowlist_function("rb_attr_get")
+        .allowlist_function("rb_ivar_get")
+
+        // From include/ruby/internal/intern/vm.h
+        .allowlist_function("rb_get_alloc_func")
+
+        // From gc.h and internal/gc.h
+        .allowlist_function("rb_class_allocate_instance")
+        .allowlist_function("rb_obj_info")
+
+        // We define VALUE manually, don't import it
+        .blocklist_type("VALUE")
+
+        // From iseq.h
+        .opaque_type("rb_iseq_t")
+        .blocklist_type("rb_iseq_t")
+
+        // Finish the builder and generate the bindings.
+        .generate()
+        // Unwrap the Result and panic on failure.
+        .expect("Unable to generate bindings");
+
+    let mut out_path: PathBuf = src_root;
+    out_path.push("yjit");
+    out_path.push("src");
+    out_path.push("cruby_bindings.inc.rs");
+
+    bindings
+        .write_to_file(out_path)
+        .expect("Couldn't write bindings!");
+}
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
new file mode 100644
index 0000000000..0d61cd654a
--- /dev/null
+++ b/yjit/src/asm/mod.rs
@@ -0,0 +1,392 @@
+use std::collections::BTreeMap;
+use std::mem;
+
+// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
+#[rustfmt::skip]
+pub mod x86_64;
+
+/// Pointer to a piece of machine code
+/// We may later change this to wrap an u32
+/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+#[repr(C)]
+pub struct CodePtr(*const u8);
+
+impl CodePtr {
+    pub fn raw_ptr(&self) -> *const u8 {
+        let CodePtr(ptr) = *self;
+        return ptr;
+    }
+
+    fn into_i64(&self) -> i64 {
+        let CodePtr(ptr) = self;
+        *ptr as i64
+    }
+
+    fn into_usize(&self) -> usize {
+        let CodePtr(ptr) = self;
+        *ptr as usize
+    }
+}
+
+impl From<*mut u8> for CodePtr {
+    fn from(value: *mut u8) -> Self {
+        assert!(value as usize != 0);
+        return CodePtr(value);
+    }
+}
+
+/// Compute an offset in bytes of a given struct field
+macro_rules! offset_of {
+    ($struct_type:ty, $field_name:tt) => {{
+        // Null pointer to our struct type
+        let foo = (0 as *const $struct_type);
+
+        unsafe {
+            let ptr_field = (&(*foo).$field_name as *const _ as usize);
+            let ptr_base = (foo as usize);
+            ptr_field - ptr_base
+        }
+    }};
+}
+pub(crate) use offset_of;
+
+//
+// TODO: need a field_size_of macro, to compute the size of a struct field in bytes
+//
+
+// 1 is not aligned so this won't match any pages
+const ALIGNED_WRITE_POSITION_NONE: usize = 1;
+
+/// Reference to an ASM label
+struct LabelRef {
+    // Position in the code block where the label reference exists
+    pos: usize,
+
+    // Label which this refers to
+    label_idx: usize,
+}
+
+/// Block of memory into which instructions can be assembled
+pub struct CodeBlock {
+    // Block of non-executable memory used for dummy code blocks
+    // This memory is owned by this block and lives as long as the block
+    dummy_block: Vec<u8>,
+
+    // Pointer to memory we are writing into
+    mem_block: *mut u8,
+
+    // Memory block size
+    mem_size: usize,
+
+    // Current writing position
+    write_pos: usize,
+
+    // Table of registered label addresses
+    label_addrs: Vec<usize>,
+
+    // Table of registered label names
+    label_names: Vec<String>,
+
+    // References to labels
+    label_refs: Vec<LabelRef>,
+
+    // Comments for assembly instructions, if that feature is enabled
+    asm_comments: BTreeMap<usize, Vec<String>>,
+
+    // Keep track of the current aligned write position.
+    // Used for changing protection when writing to the JIT buffer
+    current_aligned_write_pos: usize,
+
+    // Memory protection works at page granularity and this is the
+    // the size of each page. Used to implement W^X.
+    page_size: usize,
+
+    // Set if the CodeBlock is unable to output some instructions,
+    // for example, when there is not enough space or when a jump
+    // target is too far away.
+    dropped_bytes: bool,
+}
+
+impl CodeBlock {
+    pub fn new_dummy(mem_size: usize) -> Self {
+        // Allocate some non-executable memory
+        let mut dummy_block = vec![0; mem_size];
+        let mem_ptr = dummy_block.as_mut_ptr();
+
+        Self {
+            dummy_block: dummy_block,
+            mem_block: mem_ptr,
+            mem_size: mem_size,
+            write_pos: 0,
+            label_addrs: Vec::new(),
+            label_names: Vec::new(),
+            label_refs: Vec::new(),
+            asm_comments: BTreeMap::new(),
+            current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
+            page_size: 4096,
+            dropped_bytes: false,
+        }
+    }
+
+    pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self {
+        Self {
+            dummy_block: vec![0; 0],
+            mem_block: mem_block,
+            mem_size: mem_size,
+            write_pos: 0,
+            label_addrs: Vec::new(),
+            label_names: Vec::new(),
+            label_refs: Vec::new(),
+            asm_comments: BTreeMap::new(),
+            current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
+            page_size,
+            dropped_bytes: false,
+        }
+    }
+
+    // Check if this code block has sufficient remaining capacity
+    pub fn has_capacity(&self, num_bytes: usize) -> bool {
+        self.write_pos + num_bytes < self.mem_size
+    }
+
+    /// Add an assembly comment if the feature is on.
+    /// If not, this becomes an inline no-op.
+    #[inline]
+    pub fn add_comment(&mut self, comment: &str) {
+        if cfg!(feature = "asm_comments") {
+            let cur_ptr = self.get_write_ptr().into_usize();
+            let this_line_comments = self.asm_comments.get(&cur_ptr);
+
+            // If there's no current list of comments for this line number, add one.
+            if this_line_comments.is_none() {
+                let new_comments = Vec::new();
+                self.asm_comments.insert(cur_ptr, new_comments);
+            }
+            let this_line_comments = self.asm_comments.get_mut(&cur_ptr).unwrap();
+
+            // Unless this comment is the same as the last one at this same line, add it.
+            let string_comment = String::from(comment);
+            if this_line_comments.last() != Some(&string_comment) {
+                this_line_comments.push(string_comment);
+            }
+        }
+    }
+
+    pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> {
+        self.asm_comments.get(&pos)
+    }
+
+    pub fn get_mem_size(&self) -> usize {
+        self.mem_size
+    }
+
+    pub fn get_write_pos(&self) -> usize {
+        self.write_pos
+    }
+
+    // Set the current write position
+    pub fn set_pos(&mut self, pos: usize) {
+        // Assert here since while CodeBlock functions do bounds checking, there is
+        // nothing stopping users from taking out an out-of-bounds pointer and
+        // doing bad accesses with it.
+        assert!(pos < self.mem_size);
+        self.write_pos = pos;
+    }
+
+    // Align the current write pointer to a multiple of bytes
+    pub fn align_pos(&mut self, multiple: u32) {
+        // Compute the alignment boundary that is lower or equal
+        // Do everything with usize
+        let multiple: usize = multiple.try_into().unwrap();
+        let pos = self.get_write_ptr().raw_ptr() as usize;
+        let remainder = pos % multiple;
+        let prev_aligned = pos - remainder;
+
+        if prev_aligned == pos {
+            // Already aligned so do nothing
+        } else {
+            // Align by advancing
+            let pad = multiple - remainder;
+            self.set_pos(self.get_write_pos() + pad);
+        }
+    }
+
+    // Set the current write position from a pointer
+    pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
+        let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize);
+        self.set_pos(pos);
+    }
+
+    // Get a direct pointer into the executable memory block
+    pub fn get_ptr(&self, offset: usize) -> CodePtr {
+        unsafe {
+            let ptr = self.mem_block.offset(offset as isize);
+            CodePtr(ptr)
+        }
+    }
+
+    // Get a direct pointer to the current write position
+    pub fn get_write_ptr(&mut self) -> CodePtr {
+        self.get_ptr(self.write_pos)
+    }
+
+    // Write a single byte at the current position
+    pub fn write_byte(&mut self, byte: u8) {
+        if self.write_pos < self.mem_size {
+            self.mark_position_writable(self.write_pos);
+            unsafe { self.mem_block.add(self.write_pos).write(byte) };
+            self.write_pos += 1;
+        } else {
+            self.dropped_bytes = true;
+        }
+    }
+
+    // Read a single byte at the given position
+    pub fn read_byte(&self, pos: usize) -> u8 {
+        assert!(pos < self.mem_size);
+        unsafe { self.mem_block.add(pos).read() }
+    }
+
+    // Write multiple bytes starting from the current position
+    pub fn write_bytes(&mut self, bytes: &[u8]) {
+        for byte in bytes {
+            self.write_byte(*byte);
+        }
+    }
+
+    // Write a signed integer over a given number of bits at the current position
+    pub fn write_int(&mut self, val: u64, num_bits: u32) {
+        assert!(num_bits > 0);
+        assert!(num_bits % 8 == 0);
+
+        // Switch on the number of bits
+        match num_bits {
+            8 => self.write_byte(val as u8),
+            16 => self.write_bytes(&[(val & 0xff) as u8, ((val >> 8) & 0xff) as u8]),
+            32 => self.write_bytes(&[
+                (val & 0xff) as u8,
+                ((val >> 8) & 0xff) as u8,
+                ((val >> 16) & 0xff) as u8,
+                ((val >> 24) & 0xff) as u8,
+            ]),
+            _ => {
+                let mut cur = val;
+
+                // Write out the bytes
+                for _byte in 0..(num_bits / 8) {
+                    self.write_byte((cur & 0xff) as u8);
+                    cur >>= 8;
+                }
+            }
+        }
+    }
+
+    /// Check if bytes have been dropped (unwritten because of insufficient space)
+    pub fn has_dropped_bytes(&self) -> bool {
+        self.dropped_bytes
+    }
+
+    /// Allocate a new label with a given name
+    pub fn new_label(&mut self, name: String) -> usize {
+        // This label doesn't have an address yet
+        self.label_addrs.push(0);
+        self.label_names.push(name);
+
+        return self.label_addrs.len() - 1;
+    }
+
+    /// Write a label at the current address
+    pub fn write_label(&mut self, label_idx: usize) {
+        // TODO: make sure that label_idx is valid
+        // TODO: add an asseer here
+
+        self.label_addrs[label_idx] = self.write_pos;
+    }
+
+    // Add a label reference at the current write position
+    pub fn label_ref(&mut self, label_idx: usize) {
+        // TODO: make sure that label_idx is valid
+        // TODO: add an asseer here
+
+        // Keep track of the reference
+        self.label_refs.push(LabelRef {
+            pos: self.write_pos,
+            label_idx,
+        });
+    }
+
+    // Link internal label references
+    pub fn link_labels(&mut self) {
+        let orig_pos = self.write_pos;
+
+        // For each label reference
+        for label_ref in mem::take(&mut self.label_refs) {
+            let ref_pos = label_ref.pos;
+            let label_idx = label_ref.label_idx;
+            assert!(ref_pos < self.mem_size);
+
+            let label_addr = self.label_addrs[label_idx];
+            assert!(label_addr < self.mem_size);
+
+            // Compute the offset from the reference's end to the label
+            let offset = (label_addr as i64) - ((ref_pos + 4) as i64);
+
+            self.set_pos(ref_pos);
+            self.write_int(offset as u64, 32);
+        }
+
+        self.write_pos = orig_pos;
+
+        // Clear the label positions and references
+        self.label_addrs.clear();
+        self.label_names.clear();
+        assert!(self.label_refs.is_empty());
+    }
+
+    pub fn mark_position_writable(&mut self, write_pos: usize) {
+        let page_size = self.page_size;
+        let aligned_position = (write_pos / page_size) * page_size;
+
+        if self.current_aligned_write_pos != aligned_position {
+            self.current_aligned_write_pos = aligned_position;
+
+            #[cfg(not(test))]
+            unsafe {
+                use core::ffi::c_void;
+                let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void;
+                crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap());
+            }
+        }
+    }
+
+    pub fn mark_all_executable(&mut self) {
+        self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
+
+        #[cfg(not(test))]
+        unsafe {
+            use core::ffi::c_void;
+            // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock
+            // The start of the top half of the region isn't necessarily a page boundary...
+            let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void;
+            crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap());
+        }
+    }
+}
+
+/// Wrapper struct so we can use the type system to distinguish
+/// Between the inlined and outlined code blocks
+pub struct OutlinedCb {
+    // This must remain private
+    cb: CodeBlock,
+}
+
+impl OutlinedCb {
+    pub fn wrap(cb: CodeBlock) -> Self {
+        OutlinedCb { cb: cb }
+    }
+
+    pub fn unwrap(&mut self) -> &mut CodeBlock {
+        &mut self.cb
+    }
+}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
new file mode 100644
index 0000000000..c748ec1154
--- /dev/null
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -0,0 +1,1395 @@
+use std::io::{Result, Write};
+use std::mem;
+use crate::asm::*;
+
+// Import the assembler tests module
+mod tests;
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Imm
+{
+    // Size in bits
+    num_bits: u8,
+
+    // The value of the immediate
+    value: i64
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86UImm
+{
+    // Size in bits
+    num_bits: u8,
+
+    // The value of the immediate
+    value: u64
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum RegType
+{
+    GP,
+    //FP,
+    //XMM,
+    IP,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Reg
+{
+    // Size in bits
+    num_bits: u8,
+
+    // Register type
+    reg_type: RegType,
+
+    // Register index number
+    reg_no: u8,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Mem
+{
+    // Size in bits
+    num_bits: u8,
+
+    /// Base register number
+    base_reg_no: u8,
+
+    /// Index register number
+    idx_reg_no: Option<u8>,
+
+    /// SIB scale exponent value (power of two, two bits)
+    scale_exp: u8,
+
+    /// Constant displacement from the base, not scaled
+    disp: i32,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum X86Opnd
+{
+    // Dummy operand
+    None,
+
+    // Immediate value
+    Imm(X86Imm),
+
+    // Unsigned immediate
+    UImm(X86UImm),
+
+    // General-purpose register
+    Reg(X86Reg),
+
+    // Memory location
+    Mem(X86Mem),
+
+    // IP-relative memory location
+    IPRel(i32)
+}
+
+impl X86Opnd {
+    fn rex_needed(&self) -> bool {
+        match self {
+            X86Opnd::None => false,
+            X86Opnd::Imm(_) => false,
+            X86Opnd::UImm(_) => false,
+            X86Opnd::Reg(reg) => reg.reg_no > 7 || reg.num_bits == 8 && reg.reg_no >= 4,
+            X86Opnd::Mem(mem) => (mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7)),
+            X86Opnd::IPRel(_) => false
+        }
+    }
+
+    // Check if an SIB byte is needed to encode this operand
+    fn sib_needed(&self) -> bool {
+        match self {
+            X86Opnd::Mem(mem) => {
+                mem.idx_reg_no.is_some() ||
+                mem.base_reg_no == RSP_REG_NO ||
+                mem.base_reg_no == R12_REG_NO
+            },
+            _ => false
+        }
+    }
+
+    fn disp_size(&self) -> u32 {
+        match self {
+            X86Opnd::IPRel(_) => 32,
+            X86Opnd::Mem(mem) => {
+                if mem.disp != 0 {
+                    // Compute the required displacement size
+                    let num_bits = sig_imm_size(mem.disp.into());
+                    if num_bits > 32 {
+                        panic!("displacement does not fit in 32 bits");
+                    }
+
+                    // x86 can only encode 8-bit and 32-bit displacements
+                    if num_bits == 16 { 32 } else { 8 }
+                } else if mem.base_reg_no == RBP_REG_NO || mem.base_reg_no == R13_REG_NO {
+                    // If EBP or RBP or R13 is used as the base, displacement must be encoded
+                    8
+                } else {
+                    0
+                }
+            },
+            _ => 0
+        }
+    }
+
+    pub fn num_bits(&self) -> u8 {
+        match self {
+            X86Opnd::Reg(reg) => reg.num_bits,
+            X86Opnd::Imm(imm) => imm.num_bits,
+            X86Opnd::UImm(uimm) => uimm.num_bits,
+            X86Opnd::Mem(mem) => mem.num_bits,
+            _ => unreachable!()
+        }
+    }
+}
+
+// Instruction pointer
+pub const RIP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::IP, reg_no: 5 });
+
+// 64-bit GP registers
+const RAX_REG_NO: u8 = 0;
+const RSP_REG_NO: u8 = 4;
+const RBP_REG_NO: u8 = 5;
+const R12_REG_NO: u8 = 12;
+const R13_REG_NO: u8 = 13;
+
+pub const RAX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO });
+pub const RCX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 });
+pub const RDX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 });
+pub const RBX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 });
+pub const RSP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO });
+pub const RBP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO });
+pub const RSI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 });
+pub const RDI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 });
+pub const R8:  X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 });
+pub const R9:  X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 });
+pub const R10: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 });
+pub const R11: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 });
+pub const R12: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO });
+pub const R13: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO });
+pub const R14: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 });
+pub const R15: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 });
+
+// 32-bit GP registers
+pub const EAX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 });
+pub const ECX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 1 });
+pub const EDX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 2 });
+pub const EBX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 3 });
+pub const ESP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 4 });
+pub const EBP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 5 });
+pub const ESI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 6 });
+pub const EDI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 7 });
+pub const R8D: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 8 });
+pub const R9D: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 9 });
+pub const R10D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 10 });
+pub const R11D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 11 });
+pub const R12D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 12 });
+pub const R13D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 13 });
+pub const R14D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 14 });
+pub const R15D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 15 });
+
+// 16-bit GP registers
+pub const AX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 0 });
+pub const CX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 });
+pub const DX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 });
+pub const BX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 });
+pub const SP:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 });
+pub const BP:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 });
+pub const SI:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 });
+pub const DI:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 });
+pub const R8W:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 8 });
+pub const R9W:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 9 });
+pub const R10W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 10 });
+pub const R11W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 11 });
+pub const R12W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 12 });
+pub const R13W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 13 });
+pub const R14W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 14 });
+pub const R15W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 15 });
+
+// 8-bit GP registers
+pub const AL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 0 });
+pub const CL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 1 });
+pub const DL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 2 });
+pub const BL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 3 });
+pub const SPL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 4 });
+pub const BPL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 5 });
+pub const SIL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 6 });
+pub const DIL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 7 });
+pub const R8B:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 8 });
+pub const R9B:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 9 });
+pub const R10B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 10 });
+pub const R11B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 11 });
+pub const R12B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 12 });
+pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 13 });
+pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 });
+pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 });
+
+// C argument registers
+pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9];
+
+//===========================================================================
+
+/// Compute the number of bits needed to encode a signed value
+pub fn sig_imm_size(imm: i64) -> u8
+{
+    // Compute the smallest size this immediate fits in
+    if imm >= i8::MIN.into() && imm <= i8::MAX.into() {
+        return 8;
+    }
+    if imm >= i16::MIN.into() && imm <= i16::MAX.into() {
+        return 16;
+    }
+    if imm >= i32::MIN.into() && imm <= i32::MAX.into() {
+        return 32;
+    }
+
+    return 64;
+}
+
+/// Compute the number of bits needed to encode an unsigned value
+pub fn unsig_imm_size(imm: u64) -> u8
+{
+    // Compute the smallest size this immediate fits in
+    if imm <= u8::MAX.into() {
+        return 8;
+    }
+    else if imm <= u16::MAX.into() {
+        return 16;
+    }
+    else if imm <= u32::MAX.into() {
+        return 32;
+    }
+
+    return 64;
+}
+
+/// Shorthand for memory operand with base register and displacement
+pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd
+{
+    let base_reg = match base_reg {
+        X86Opnd::Reg(reg) => reg,
+        _ => unreachable!()
+    };
+
+    if base_reg.reg_type == RegType::IP {
+        X86Opnd::IPRel(disp)
+    } else {
+        X86Opnd::Mem(
+            X86Mem {
+                num_bits: num_bits,
+                base_reg_no: base_reg.reg_no,
+                idx_reg_no: None,
+                scale_exp: 0,
+                disp: disp,
+            }
+        )
+    }
+}
+
+/// Memory operand with SIB (Scale Index Base) indexing
+pub fn mem_opnd_sib(num_bits: u8, base_opnd: X86Opnd, index_opnd: X86Opnd, scale: i32, disp: i32) -> X86Opnd {
+    if let (X86Opnd::Reg(base_reg), X86Opnd::Reg(index_reg)) = (base_opnd, index_opnd) {
+        let scale_exp: u8;
+
+        match scale {
+            8 => { scale_exp = 3; },
+            4 => { scale_exp = 2; },
+            2 => { scale_exp = 1; },
+            1 => { scale_exp = 0; },
+            _ => unreachable!()
+        };
+
+        X86Opnd::Mem(X86Mem {
+            num_bits,
+            base_reg_no: base_reg.reg_no,
+            idx_reg_no: Some(index_reg.reg_no),
+            scale_exp,
+            disp
+        })
+    } else {
+        unreachable!()
+    }
+}
+
+/*
+// Struct member operand
+#define member_opnd(base_reg, struct_type, member_name) mem_opnd( \
+    8 * sizeof(((struct_type*)0)->member_name), \
+    base_reg,                                   \
+    offsetof(struct_type, member_name)          \
+)
+
+// Struct member operand with an array index
+#define member_opnd_idx(base_reg, struct_type, member_name, idx) mem_opnd( \
+    8 * sizeof(((struct_type*)0)->member_name[0]),     \
+    base_reg,                                       \
+    (offsetof(struct_type, member_name) +           \
+     sizeof(((struct_type*)0)->member_name[0]) * idx)  \
+)
+*/
+
+/*
+// TODO: this should be a method, X86Opnd.resize() or X86Opnd.subreg()
+static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits)
+{
+    assert (num_bits % 8 == 0);
+    x86opnd_t sub = opnd;
+    sub.num_bits = num_bits;
+    return sub;
+}
+*/
+
+pub fn imm_opnd(value: i64) -> X86Opnd
+{
+    X86Opnd::Imm(X86Imm { num_bits: sig_imm_size(value), value })
+}
+
+pub fn uimm_opnd(value: u64) -> X86Opnd
+{
+    X86Opnd::UImm(X86UImm { num_bits: unsig_imm_size(value), value })
+}
+
+pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
+{
+    uimm_opnd(ptr as u64)
+}
+
+pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
+{
+    uimm_opnd(code_ptr.raw_ptr() as u64)
+}
+
+/// Write the REX byte
+fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
+    // 0 1 0 0 w r x b
+    // w - 64-bit operand size flag
+    // r - MODRM.reg extension
+    // x - SIB.index extension
+    // b - MODRM.rm or SIB.base extension
+    let w: u8 = if w_flag { 1 } else { 0 };
+    let r: u8 = if (reg_no & 8) > 0 { 1 } else { 0 };
+    let x: u8 = if (idx_reg_no & 8) > 0 { 1 } else { 0 };
+    let b: u8 = if (rm_reg_no & 8) > 0 { 1 } else { 0 };
+
+    // Encode and write the REX byte
+    cb.write_byte(0x40 + (w << 3) + (r << 2) + (x << 1) + (b));
+}
+
+/// Write an opcode byte with an embedded register operand
+fn write_opcode(cb: &mut CodeBlock, opcode: u8, reg: X86Reg) {
+    let op_byte: u8 = opcode | (reg.reg_no & 7);
+    cb.write_byte(op_byte);
+}
+
+/// Encode an RM instruction
+fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_opnd: X86Opnd, op_ext: u8, bytes: &[u8]) {
+    let op_len = bytes.len();
+    assert!(op_len > 0 && op_len <= 3);
+    assert!(matches!(r_opnd, X86Opnd::Reg(_) | X86Opnd::None), "Can only encode an RM instruction with a register or a none");
+
+    // Flag to indicate the REX prefix is needed
+    let need_rex = rex_w || r_opnd.rex_needed() || rm_opnd.rex_needed();
+
+    // Flag to indicate SIB byte is needed
+    let need_sib = r_opnd.sib_needed() || rm_opnd.sib_needed();
+
+    // Add the operand-size prefix, if needed
+    if sz_pref {
+        cb.write_byte(0x66);
+    }
+
+    // Add the REX prefix, if needed
+    if need_rex {
+        // 0 1 0 0 w r x b
+        // w - 64-bit operand size flag
+        // r - MODRM.reg extension
+        // x - SIB.index extension
+        // b - MODRM.rm or SIB.base extension
+
+        let w = if rex_w { 1 } else { 0 };
+        let r = match r_opnd {
+            X86Opnd::None => 0,
+            X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 },
+            _ => unreachable!()
+        };
+
+        let x = match (need_sib, rm_opnd) {
+            (true, X86Opnd::Mem(mem)) => if (mem.idx_reg_no.unwrap_or(0) & 8) > 0 { 1 } else { 0 },
+            _ => 0
+        };
+
+        let b = match rm_opnd {
+            X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 },
+            X86Opnd::Mem(mem) => if (mem.base_reg_no & 8) > 0 { 1 } else { 0 },
+            _ => 0
+        };
+
+        // Encode and write the REX byte
+        let rex_byte: u8 = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
+        cb.write_byte(rex_byte);
+    }
+
+    // Write the opcode bytes to the code block
+    for byte in bytes {
+        cb.write_byte(*byte)
+    }
+
+    // MODRM.mod (2 bits)
+    // MODRM.reg (3 bits)
+    // MODRM.rm  (3 bits)
+
+    assert!(
+        !(op_ext != 0xff && !matches!(r_opnd, X86Opnd::None)),
+        "opcode extension and register operand present"
+    );
+
+    // Encode the mod field
+    let rm_mod = match rm_opnd {
+        X86Opnd::Reg(_) => 3,
+        X86Opnd::IPRel(_) => 0,
+        X86Opnd::Mem(_mem) => {
+            match rm_opnd.disp_size() {
+                0 => 0,
+                8 => 1,
+                32 => 2,
+                _ => unreachable!()
+            }
+        },
+        _ => unreachable!()
+    };
+
+    // Encode the reg field
+    let reg: u8;
+    if op_ext != 0xff {
+        reg = op_ext;
+    } else {
+        reg = match r_opnd {
+            X86Opnd::Reg(reg) => reg.reg_no & 7,
+            _ => 0
+        };
+    }
+
+    // Encode the rm field
+    let rm = match rm_opnd {
+        X86Opnd::Reg(reg) => reg.reg_no & 7,
+        X86Opnd::Mem(mem) => if need_sib { 4 } else { mem.base_reg_no & 7 },
+        X86Opnd::IPRel(_) => 0b101,
+        _ => unreachable!()
+    };
+
+    // Encode and write the ModR/M byte
+    let rm_byte: u8 = (rm_mod << 6) + (reg << 3) + (rm);
+    cb.write_byte(rm_byte);
+
+    // Add the SIB byte, if needed
+    if need_sib {
+        // SIB.scale (2 bits)
+        // SIB.index (3 bits)
+        // SIB.base  (3 bits)
+
+        match rm_opnd {
+            X86Opnd::Mem(mem) => {
+                // Encode the scale value
+                let scale = mem.scale_exp;
+
+                // Encode the index value
+                let index = mem.idx_reg_no.map(|no| no & 7).unwrap_or(4);
+
+                // Encode the base register
+                let base = mem.base_reg_no & 7;
+
+                // Encode and write the SIB byte
+                let sib_byte: u8 = (scale << 6) + (index << 3) + (base);
+                cb.write_byte(sib_byte);
+            },
+            _ => panic!("Expected mem operand")
+        }
+    }
+
+    // Add the displacement
+    match rm_opnd {
+        X86Opnd::Mem(mem) => {
+            let disp_size = rm_opnd.disp_size();
+            if disp_size > 0 {
+                cb.write_int(mem.disp as u64, disp_size);
+            }
+        },
+        X86Opnd::IPRel(rel) => {
+            cb.write_int(rel as u64, 32);
+        },
+        _ => ()
+    };
+}
+
+// Encode a mul-like single-operand RM instruction
+fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_ext: u8, opnd: X86Opnd) {
+    assert!(matches!(opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    let opnd_size = opnd.num_bits();
+    assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    if opnd_size == 8 {
+        write_rm(cb, false, false, X86Opnd::None, opnd, op_ext, &[op_mem_reg_8]);
+    } else {
+        let sz_pref = opnd_size == 16;
+        let rex_w = opnd_size == 64;
+        write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd, op_ext, &[op_mem_reg_pref]);
+    }
+}
+
+// Encode an add-like RM instruction with multiple possible encodings
+fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_reg_mem8: u8, op_reg_mem_pref: u8, op_mem_imm8: u8, op_mem_imm_sml: u8, op_mem_imm_lrg: u8, op_ext_imm: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+    assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    // Check the size of opnd0
+    let opnd_size = opnd0.num_bits();
+    assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    // Check the size of opnd1
+    match opnd1 {
+        X86Opnd::Reg(reg) => assert!(reg.num_bits == opnd_size),
+        X86Opnd::Mem(mem) => assert!(mem.num_bits == opnd_size),
+        X86Opnd::Imm(imm) => assert!(imm.num_bits <= opnd_size),
+        X86Opnd::UImm(uimm) => assert!(uimm.num_bits <= opnd_size),
+        _ => ()
+    };
+
+    let sz_pref = opnd_size == 16;
+    let rex_w = opnd_size == 64;
+
+    match (opnd0, opnd1) {
+        // R/M + Reg
+        (X86Opnd::Mem(_), X86Opnd::Reg(_)) | (X86Opnd::Reg(_), X86Opnd::Reg(_)) => {
+            if opnd_size == 8 {
+                write_rm(cb, false, false, opnd1, opnd0, 0xff, &[op_mem_reg8]);
+            } else {
+                write_rm(cb, sz_pref, rex_w, opnd1, opnd0, 0xff, &[op_mem_reg_pref]);
+            }
+        },
+        // Reg + R/M/IPRel
+        (X86Opnd::Reg(_), X86Opnd::Mem(_) | X86Opnd::IPRel(_)) => {
+            if opnd_size == 8 {
+                write_rm(cb, false, false, opnd0, opnd1, 0xff, &[op_reg_mem8]);
+            } else {
+                write_rm(cb, sz_pref, rex_w, opnd0, opnd1, 0xff, &[op_reg_mem_pref]);
+            }
+        },
+        // R/M + Imm
+        (_, X86Opnd::Imm(imm)) => {
+            if imm.num_bits <= 8 {
+                // 8-bit immediate
+
+                if opnd_size == 8 {
+                    write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]);
+                } else {
+                    write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]);
+                }
+
+                cb.write_int(imm.value as u64, 8);
+            } else if imm.num_bits <= 32 {
+                // 32-bit immediate
+
+                assert!(imm.num_bits <= opnd_size);
+                write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]);
+                cb.write_int(imm.value as u64, if opnd_size > 32 { 32 } else { opnd_size.into() });
+            } else {
+                panic!("immediate value too large");
+            }
+        },
+        // R/M + UImm
+        (_, X86Opnd::UImm(uimm)) => {
+            let num_bits = sig_imm_size(uimm.value.try_into().unwrap());
+
+            if num_bits <= 8 {
+                // 8-bit immediate
+
+                if opnd_size == 8 {
+                    write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]);
+                } else {
+                    write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]);
+                }
+
+                cb.write_int(uimm.value, 8);
+            } else if num_bits <= 32 {
+                // 32-bit immediate
+
+                assert!(num_bits <= opnd_size);
+                write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]);
+                cb.write_int(uimm.value, if opnd_size > 32 { 32 } else { opnd_size.into() });
+            } else {
+                panic!("immediate value too large");
+            }
+        },
+        _ => unreachable!()
+    };
+}
+
+// LOCK - lock prefix for atomic shared memory operations
+pub fn write_lock_prefix(cb: &mut CodeBlock) {
+    cb.write_byte(0xf0);
+}
+
+/// add - Integer addition
+pub fn add(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x00, // opMemReg8
+        0x01, // opMemRegPref
+        0x02, // opRegMem8
+        0x03, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x00, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// and - Bitwise AND
+pub fn and(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x20, // opMemReg8
+        0x21, // opMemRegPref
+        0x22, // opRegMem8
+        0x23, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x04, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// call - Call to a pointer with a 32-bit displacement offset
+pub fn call_rel32(cb: &mut CodeBlock, rel32: i32) {
+    // Write the opcode
+    cb.write_byte(0xe8);
+
+    // Write the relative 32-bit jump offset
+    cb.write_bytes(&rel32.to_le_bytes());
+}
+
+/// call - Call a pointer, encode with a 32-bit offset if possible
+pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
+    if let X86Opnd::Reg(_scratch_reg) = scratch_opnd {
+        // Pointer to the end of this call instruction
+        let end_ptr = cb.get_ptr(cb.write_pos + 5);
+
+        // Compute the jump offset
+        let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
+
+        // If the offset fits in 32-bit
+        if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
+            call_rel32(cb, rel64.try_into().unwrap());
+            return;
+        }
+
+        // Move the pointer into the scratch register and call
+        mov(cb, scratch_opnd, const_ptr_opnd(dst_ptr));
+        call(cb, scratch_opnd);
+    } else {
+        unreachable!();
+    }
+}
+
+/// call - Call to label with 32-bit offset
+pub fn call_label(cb: &mut CodeBlock, label_idx: usize) {
+    // Write the opcode
+    cb.write_byte(0xE8);
+
+    // Add a reference to the label
+    cb.label_ref(label_idx);
+
+    // Relative 32-bit offset to be patched
+    cb.write_int(0, 32);
+}
+
+/// call - Indirect call with an R/M operand
+pub fn call(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm(cb, false, false, X86Opnd::None, opnd, 2, &[0xff]);
+}
+
+/// Encode a conditional move instruction
+fn write_cmov(cb: &mut CodeBlock, opcode1: u8, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(reg) = dst {
+        match src {
+            X86Opnd::Reg(_) => (),
+            X86Opnd::Mem(_) => (),
+            _ => unreachable!()
+        };
+
+        assert!(reg.num_bits >= 16);
+        let sz_pref = reg.num_bits == 16;
+        let rex_w = reg.num_bits == 64;
+
+        write_rm(cb, sz_pref, rex_w, dst, src, 0xff, &[0x0f, opcode1]);
+    } else {
+        unreachable!()
+    }
+}
+
+// cmovcc - Conditional move
+pub fn cmova(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); }
+pub fn cmovae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmovbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); }
+pub fn cmovc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmove(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); }
+pub fn cmovg(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); }
+pub fn cmovge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4d, dst, src); }
+pub fn cmovl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); }
+pub fn cmovle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); }
+pub fn cmovna(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); }
+pub fn cmovnae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmovnb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovnbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); }
+pub fn cmovnc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovne(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); }
+pub fn cmovng(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); }
+pub fn cmovnge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); }
+pub fn cmovnl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb,  0x4d, dst, src); }
+pub fn cmovnle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); }
+pub fn cmovno(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x41, dst, src); }
+pub fn cmovnp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); }
+pub fn cmovns(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x49, dst, src); }
+pub fn cmovnz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); }
+pub fn cmovo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x40, dst, src); }
+pub fn cmovp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); }
+pub fn cmovpe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); }
+pub fn cmovpo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); }
+pub fn cmovs(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x48, dst, src); }
+pub fn cmovz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); }
+
+/// cmp - Compare and set flags
+pub fn cmp(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x38, // opMemReg8
+        0x39, // opMemRegPref
+        0x3A, // opRegMem8
+        0x3B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x07, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// cdq - Convert doubleword to quadword
+pub fn cdq(cb: &mut CodeBlock) {
+    cb.write_byte(0x99);
+}
+
+/// cqo - Convert quadword to octaword
+pub fn cqo(cb: &mut CodeBlock) {
+    cb.write_bytes(&[0x48, 0x99]);
+}
+
+/// Interrupt 3 - trap to debugger
+pub fn int3(cb: &mut CodeBlock) {
+    cb.write_byte(0xcc);
+}
+
+// Encode a relative jump to a label (direct or conditional)
+// Note: this always encodes a 32-bit offset
+fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) {
+    // Write the opcode
+    if op0 != 0xff {
+        cb.write_byte(op0);
+    }
+
+    cb.write_byte(op1);
+
+    // Add a reference to the label
+    cb.label_ref(label_idx);
+
+    // Relative 32-bit offset to be patched
+    cb.write_int( 0, 32);
+}
+
+/// jcc - relative jumps to a label
+pub fn ja_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
+pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jb_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
+pub fn jc_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn je_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
+pub fn jg_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
+pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
+pub fn jl_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
+pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
+pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
+pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
+pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
+pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
+pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
+pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
+pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
+pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); }
+pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); }
+pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); }
+pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
+pub fn jo_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); }
+pub fn jp_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
+pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
+pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); }
+pub fn js_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); }
+pub fn jz_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
+pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); }
+
+/// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
+fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
+    // Write the opcode
+    if op0 != 0xFF {
+        cb.write_byte(op0);
+    }
+
+    cb.write_byte(op1);
+
+    // Pointer to the end of this jump instruction
+    let end_ptr = cb.get_ptr(cb.write_pos + 4);
+
+    // Compute the jump offset
+    let rel64 = (dst_ptr.0 as i64) - (end_ptr.0 as i64);
+
+    if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
+        // Write the relative 32-bit jump offset
+        cb.write_int(rel64 as u64, 32);
+    }
+    else {
+        // Offset doesn't fit in 4 bytes. Report error.
+        cb.dropped_bytes = true;
+    }
+}
+
+/// jcc - relative jumps to a pointer (32-bit offset)
+pub fn ja_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); }
+pub fn jae_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jb_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn jbe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); }
+pub fn jc_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn je_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); }
+pub fn jg_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); }
+pub fn jge_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); }
+pub fn jl_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); }
+pub fn jle_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); }
+pub fn jna_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); }
+pub fn jnae_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn jnb_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jnbe_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); }
+pub fn jnc_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jne_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); }
+pub fn jng_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); }
+pub fn jnge_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); }
+pub fn jnl_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); }
+pub fn jnle_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); }
+pub fn jno_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x81, ptr); }
+pub fn jnp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8b, ptr); }
+pub fn jns_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x89, ptr); }
+pub fn jnz_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); }
+pub fn jo_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x80, ptr); }
+pub fn jp_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); }
+pub fn jpe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); }
+pub fn jpo_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8B, ptr); }
+pub fn js_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x88, ptr); }
+pub fn jz_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); }
+pub fn jmp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0xFF, 0xE9, ptr); }
+
+/// jmp - Indirect jump near to an R/M operand.
+pub fn jmp_rm(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm(cb, false, false, X86Opnd::None, opnd, 4, &[0xff]);
+}
+
+// jmp - Jump with relative 32-bit offset
+pub fn jmp32(cb: &mut CodeBlock, offset: i32) {
+    cb.write_byte(0xE9);
+    cb.write_int(offset as u64, 32);
+}
+
+/// lea - Load Effective Address
+pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(reg) = dst {
+        assert!(reg.num_bits == 64);
+        write_rm(cb, false, true, dst, src, 0xff, &[0x8d]);
+    } else {
+        unreachable!();
+    }
+}
+
+/// mov - Data move operation
+pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    match (dst, src) {
+        // R + Imm
+        (X86Opnd::Reg(reg), X86Opnd::Imm(imm)) => {
+            assert!(imm.num_bits <= reg.num_bits);
+
+            // In case the source immediate could be zero extended to be 64
+            // bit, we can use the 32-bit operands version of the instruction.
+            // For example, we can turn mov(rax, 0x34) into the equivalent
+            // mov(eax, 0x34).
+            if (reg.num_bits == 64) && (imm.value > 0) && (imm.num_bits <= 32) {
+                if dst.rex_needed() {
+                    write_rex(cb, false, 0, 0, reg.reg_no);
+                }
+                write_opcode(cb, 0xB8, reg);
+                cb.write_int(imm.value as u64, 32);
+            } else {
+                if reg.num_bits == 16 {
+                    cb.write_byte(0x66);
+                }
+
+                if dst.rex_needed() || reg.num_bits == 64 {
+                    write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
+                }
+
+                write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
+                cb.write_int(imm.value as u64, reg.num_bits.into());
+            }
+        },
+        // R + UImm
+        (X86Opnd::Reg(reg), X86Opnd::UImm(uimm)) => {
+            assert!(uimm.num_bits <= reg.num_bits);
+
+            // In case the source immediate could be zero extended to be 64
+            // bit, we can use the 32-bit operands version of the instruction.
+            // For example, we can turn mov(rax, 0x34) into the equivalent
+            // mov(eax, 0x34).
+            if (reg.num_bits == 64) && (uimm.value <= u32::MAX.into()) {
+                if dst.rex_needed() {
+                    write_rex(cb, false, 0, 0, reg.reg_no);
+                }
+                write_opcode(cb, 0xB8, reg);
+                cb.write_int(uimm.value, 32);
+            } else {
+                if reg.num_bits == 16 {
+                    cb.write_byte(0x66);
+                }
+
+                if dst.rex_needed() || reg.num_bits == 64 {
+                    write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
+                }
+
+                write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
+                cb.write_int(uimm.value, reg.num_bits.into());
+            }
+        },
+        // M + Imm
+        (X86Opnd::Mem(mem), X86Opnd::Imm(imm)) => {
+            assert!(imm.num_bits <= mem.num_bits);
+
+            if mem.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+            } else {
+                write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+            }
+
+            let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
+            assert!(sig_imm_size(imm.value) <= (output_num_bits as u8));
+            cb.write_int(imm.value as u64, output_num_bits);
+        },
+        // M + UImm
+        (X86Opnd::Mem(mem), X86Opnd::UImm(uimm)) => {
+            assert!(uimm.num_bits <= mem.num_bits);
+
+            if mem.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+            }
+            else {
+                write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+            }
+
+            let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
+            assert!(sig_imm_size(uimm.value as i64) <= (output_num_bits as u8));
+            cb.write_int(uimm.value, output_num_bits);
+        },
+        // * + Imm/UImm
+        (_, X86Opnd::Imm(_) | X86Opnd::UImm(_)) => unreachable!(),
+        // * + *
+        (_, _) => {
+            write_rm_multi(
+                cb,
+                0x88, // opMemReg8
+                0x89, // opMemRegPref
+                0x8A, // opRegMem8
+                0x8B, // opRegMemPref
+                0xC6, // opMemImm8
+                0xFF, // opMemImmSml (not available)
+                0xFF, // opMemImmLrg
+                0xFF, // opExtImm
+                dst,
+                src
+            );
+        }
+    };
+}
+
+/// movsx - Move with sign extension (signed integers)
+pub fn movsx(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(_dst_reg) = dst {
+        assert!(matches!(src, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+        let src_num_bits = src.num_bits();
+        let dst_num_bits = dst.num_bits();
+        assert!(src_num_bits < dst_num_bits);
+
+        match src_num_bits {
+            8 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbe]),
+            16 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbf]),
+            32 => write_rm(cb, false, true, dst, src, 0xff, &[0x63]),
+            _ => unreachable!()
+        };
+    } else {
+        unreachable!();
+    }
+}
+
+/*
+/// movzx - Move with zero extension (unsigned values)
+void movzx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
+{
+    cb.writeASM("movzx", dst, src);
+
+    uint32_t dstSize;
+    if (dst.isReg)
+        dstSize = dst.reg.size;
+    else
+        assert (false, "movzx dst must be a register");
+
+    uint32_t srcSize;
+    if (src.isReg)
+        srcSize = src.reg.size;
+    else if (src.isMem)
+        srcSize = src.mem.size;
+    else
+        assert (false);
+
+    assert (
+        srcSize < dstSize,
+        "movzx: srcSize >= dstSize"
+    );
+
+    if (srcSize is 8)
+    {
+        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB6)(dstSize is 16, dstSize is 64, dst, src);
+    }
+    else if (srcSize is 16)
+    {
+        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB7)(dstSize is 16, dstSize is 64, dst, src);
+    }
+    else
+    {
+        assert (false, "invalid src operand size for movxz");
+    }
+}
+*/
+
+/// nop - Noop, one or multiple bytes long
+pub fn nop(cb: &mut CodeBlock, length: u32) {
+    match length {
+        0 => {},
+        1 => cb.write_byte(0x90),
+        2 => cb.write_bytes(&[0x66, 0x90]),
+        3 => cb.write_bytes(&[0x0f, 0x1f, 0x00]),
+        4 => cb.write_bytes(&[0x0f, 0x1f, 0x40, 0x00]),
+        5 => cb.write_bytes(&[0x0f, 0x1f, 0x44, 0x00, 0x00]),
+        6 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00]),
+        7 => cb.write_bytes(&[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00]),
+        8 => cb.write_bytes(&[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
+        9 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
+        _ => {
+            let mut written: u32 = 0;
+            while written + 9 <= length {
+                nop(cb, 9);
+                written += 9;
+            }
+            nop(cb, length - written);
+        }
+    };
+}
+
+/// not - Bitwise NOT
+pub fn not(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm_unary(
+        cb,
+        0xf6, // opMemReg8
+        0xf7, // opMemRegPref
+        0x02, // opExt
+        opnd
+    );
+}
+
+/// or - Bitwise OR
+pub fn or(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x08, // opMemReg8
+        0x09, // opMemRegPref
+        0x0A, // opRegMem8
+        0x0B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x01, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// pop - Pop a register off the stack
+pub fn pop(cb: &mut CodeBlock, opnd: X86Opnd) {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            assert!(reg.num_bits == 64);
+
+            if opnd.rex_needed() {
+                write_rex(cb, false, 0, 0, reg.reg_no);
+            }
+            write_opcode(cb, 0x58, reg);
+        },
+        X86Opnd::Mem(mem) => {
+            assert!(mem.num_bits == 64);
+
+            write_rm(cb, false, false, X86Opnd::None, opnd, 0, &[0x8f]);
+        },
+        _ => unreachable!()
+    };
+}
+
+/// popfq - Pop the flags register (64-bit)
+pub fn popfq(cb: &mut CodeBlock) {
+    // REX.W + 0x9D
+    cb.write_bytes(&[0x48, 0x9d]);
+}
+
+/// push - Push an operand on the stack
+pub fn push(cb: &mut CodeBlock, opnd: X86Opnd) {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            if opnd.rex_needed() {
+                write_rex(cb, false, 0, 0, reg.reg_no);
+            }
+            write_opcode(cb, 0x50, reg);
+        },
+        X86Opnd::Mem(_mem) => {
+            write_rm(cb, false, false, X86Opnd::None, opnd, 6, &[0xff]);
+        },
+        _ => unreachable!()
+    }
+}
+
+/// pushfq - Push the flags register (64-bit)
+pub fn pushfq(cb: &mut CodeBlock) {
+    cb.write_byte(0x9C);
+}
+
+/// ret - Return from call, popping only the return address
+pub fn ret(cb: &mut CodeBlock) {
+    cb.write_byte(0xC3);
+}
+
+// Encode a single-operand shift instruction
+fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+    assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    // Check the size of opnd0
+    let opnd_size = opnd0.num_bits();
+    assert!(opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    let sz_pref = opnd_size == 16;
+    let rex_w = opnd_size == 64;
+
+    if let X86Opnd::UImm(imm) = opnd1 {
+        if imm.value == 1 {
+            write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_one_pref]);
+        } else {
+            assert!(imm.num_bits <= 8);
+            write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_imm_pref]);
+            cb.write_byte(imm.value as u8);
+        }
+    } else {
+        unreachable!();
+    }
+}
+
+// sal - Shift arithmetic left
+pub fn sal(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x04,
+        opnd0,
+        opnd1
+    );
+}
+
+/// sar - Shift arithmetic right (signed)
+pub fn sar(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x07,
+        opnd0,
+        opnd1
+    );
+}
+
+// shl - Shift logical left
+pub fn shl(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x04,
+        opnd0,
+        opnd1
+    );
+}
+
+/// shr - Shift logical right (unsigned)
+pub fn shr(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x05,
+        opnd0,
+        opnd1
+    );
+}
+
+/// sub - Integer subtraction
+pub fn sub(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x28, // opMemReg8
+        0x29, // opMemRegPref
+        0x2A, // opRegMem8
+        0x2B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x05, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+fn resize_opnd(opnd: X86Opnd, num_bits: u8) -> X86Opnd {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            let mut cloned = reg.clone();
+            cloned.num_bits = num_bits;
+            X86Opnd::Reg(cloned)
+        },
+        X86Opnd::Mem(mem) => {
+            let mut cloned = mem.clone();
+            cloned.num_bits = num_bits;
+            X86Opnd::Mem(cloned)
+        },
+        _ => unreachable!()
+    }
+}
+
+/// test - Logical Compare
+pub fn test(cb: &mut CodeBlock, rm_opnd: X86Opnd, test_opnd: X86Opnd) {
+    assert!(matches!(rm_opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+    let rm_num_bits = rm_opnd.num_bits();
+
+    match test_opnd {
+        X86Opnd::UImm(uimm) => {
+            assert!(uimm.num_bits <= 32);
+            assert!(uimm.num_bits <= rm_num_bits);
+
+            // Use the smallest operand size possible
+            assert!(rm_num_bits % 8 == 0);
+            let rm_resized = resize_opnd(rm_opnd, uimm.num_bits);
+
+            if uimm.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, rm_resized, 0x00, &[0xf6]);
+                cb.write_int(uimm.value, uimm.num_bits.into());
+            } else {
+                write_rm(cb, uimm.num_bits == 16, false, X86Opnd::None, rm_resized, 0x00, &[0xf7]);
+                cb.write_int(uimm.value, uimm.num_bits.into());
+            }
+        },
+        X86Opnd::Imm(imm) => {
+            // This mode only applies to 64-bit R/M operands with 32-bit signed immediates
+            assert!(imm.num_bits <= 32);
+            assert!(rm_num_bits == 64);
+
+            write_rm(cb, false, true, X86Opnd::None, rm_opnd, 0x00, &[0xf7]);
+            cb.write_int(imm.value as u64, 32);
+        },
+        X86Opnd::Reg(reg) => {
+            assert!(reg.num_bits == rm_num_bits);
+
+            if rm_num_bits == 8 {
+                write_rm(cb, false, false, test_opnd, rm_opnd, 0xff, &[0x84]);
+            } else {
+                write_rm(cb, rm_num_bits == 16, rm_num_bits == 64, test_opnd, rm_opnd, 0xff, &[0x85]);
+            }
+        },
+        _ => unreachable!()
+    };
+}
+
+/// Undefined opcode
+pub fn ud2(cb: &mut CodeBlock) {
+    cb.write_bytes(&[0x0f, 0x0b]);
+}
+
+/// xchg - Exchange Register/Memory with Register
+pub fn xchg(cb: &mut CodeBlock, rm_opnd: X86Opnd, r_opnd: X86Opnd) {
+    if let (X86Opnd::Reg(rm_reg), X86Opnd::Reg(r_reg)) = (rm_opnd, r_opnd) {
+        assert!(rm_reg.num_bits == 64);
+        assert!(r_reg.num_bits == 64);
+
+        // If we're exchanging with RAX
+        if rm_reg.reg_no == RAX_REG_NO {
+            // Write the REX byte
+            write_rex(cb, true, 0, 0, r_reg.reg_no);
+
+            // Write the opcode and register number
+            cb.write_byte(0x90 + (r_reg.reg_no & 7));
+        } else {
+            write_rm(cb, false, true, r_opnd, rm_opnd, 0xff, &[0x87]);
+        }
+    } else {
+        unreachable!();
+    }
+}
+
+/// xor - Exclusive bitwise OR
+pub fn xor(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x30, // opMemReg8
+        0x31, // opMemRegPref
+        0x32, // opRegMem8
+        0x33, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x06, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
new file mode 100644
index 0000000000..bb36468a34
--- /dev/null
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -0,0 +1,447 @@
+#![cfg(test)]
+
+use crate::asm::x86_64::*;
+use std::fmt;
+
+/// Produce hex string output from the bytes in a code block
+impl<'a> fmt::LowerHex for super::CodeBlock {
+    fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
+        for pos in 0..self.write_pos {
+            let byte = self.read_byte(pos);
+            fmtr.write_fmt(format_args!("{:02x}", byte))?;
+        }
+        Ok(())
+    }
+}
+
+/// Check that the bytes for an instruction sequence match a hex string
+fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) {
+    let mut cb = super::CodeBlock::new_dummy(4096);
+    run(&mut cb);
+    assert_eq!(format!("{:x}", cb), bytes);
+}
+
+#[test]
+fn test_add() {
+    check_bytes("80c103", |cb| add(cb, CL, imm_opnd(3)));
+    check_bytes("00d9", |cb| add(cb, CL, BL));
+    check_bytes("4000e1", |cb| add(cb, CL, SPL));
+    check_bytes("6601d9", |cb| add(cb, CX, BX));
+    check_bytes("4801d8", |cb| add(cb, RAX, RBX));
+    check_bytes("01d1", |cb| add(cb, ECX, EDX));
+    check_bytes("4c01f2", |cb| add(cb, RDX, R14));
+    check_bytes("480110", |cb| add(cb, mem_opnd(64, RAX, 0), RDX));
+    check_bytes("480310", |cb| add(cb, RDX, mem_opnd(64, RAX, 0)));
+    check_bytes("48035008", |cb| add(cb, RDX, mem_opnd(64, RAX, 8)));
+    check_bytes("480390ff000000", |cb| add(cb, RDX, mem_opnd(64, RAX, 255)));
+    check_bytes("4881407fff000000", |cb| add(cb, mem_opnd(64, RAX, 127), imm_opnd(255)));
+    check_bytes("0110", |cb| add(cb, mem_opnd(32, RAX, 0), EDX));
+    check_bytes("4883c408", |cb| add(cb, RSP, imm_opnd(8)));
+    check_bytes("83c108", |cb| add(cb, ECX, imm_opnd(8)));
+    check_bytes("81c1ff000000", |cb| add(cb, ECX, imm_opnd(255)));
+}
+
+#[test]
+fn test_add_unsigned() {
+    // ADD r/m8, imm8
+    check_bytes("4180c001", |cb| add(cb, R8B, uimm_opnd(1)));
+    check_bytes("4180c07f", |cb| add(cb, R8B, imm_opnd(i8::MAX.try_into().unwrap())));
+
+    // ADD r/m16, imm16
+    check_bytes("664183c001", |cb| add(cb, R8W, uimm_opnd(1)));
+    check_bytes("664181c0ff7f", |cb| add(cb, R8W, uimm_opnd(i16::MAX.try_into().unwrap())));
+
+    // ADD r/m32, imm32
+    check_bytes("4183c001", |cb| add(cb, R8D, uimm_opnd(1)));
+    check_bytes("4181c0ffffff7f", |cb| add(cb, R8D, uimm_opnd(i32::MAX.try_into().unwrap())));
+
+    // ADD r/m64, imm32
+    check_bytes("4983c001", |cb| add(cb, R8, uimm_opnd(1)));
+    check_bytes("4981c0ffffff7f", |cb| add(cb, R8, uimm_opnd(i32::MAX.try_into().unwrap())));
+}
+
+#[test]
+fn test_and() {
+    check_bytes("4421e5", |cb| and(cb, EBP, R12D));
+    check_bytes("48832008", |cb| and(cb, mem_opnd(64, RAX, 0), imm_opnd(0x08)));
+}
+
+#[test]
+fn test_call_label() {
+    check_bytes("e8fbffffff", |cb| {
+        let label_idx = cb.new_label("fn".to_owned());
+        call_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_call_ptr() {
+    // calling a lower address
+    check_bytes("e8fbffffff", |cb| {
+        let ptr = cb.get_write_ptr();
+        call_ptr(cb, RAX, ptr.raw_ptr());
+    });
+}
+
+#[test]
+fn test_call_reg() {
+    check_bytes("ffd0", |cb| call(cb, RAX));
+}
+
+#[test]
+fn test_call_mem() {
+    check_bytes("ff542408", |cb| call(cb, mem_opnd(64, RSP, 8)));
+}
+
+#[test]
+fn test_cmovcc() {
+    check_bytes("0f4ff7", |cb| cmovg(cb, ESI, EDI));
+    check_bytes("0f4f750c", |cb| cmovg(cb, ESI, mem_opnd(32, RBP, 12)));
+    check_bytes("0f4cc1", |cb| cmovl(cb, EAX, ECX));
+    check_bytes("480f4cdd", |cb| cmovl(cb, RBX, RBP));
+    check_bytes("0f4e742404", |cb| cmovle(cb, ESI, mem_opnd(32, RSP, 4)));
+}
+
+#[test]
+fn test_cmp() {
+    check_bytes("38d1", |cb| cmp(cb, CL, DL));
+    check_bytes("39f9", |cb| cmp(cb, ECX, EDI));
+    check_bytes("493b1424", |cb| cmp(cb, RDX, mem_opnd(64, R12, 0)));
+    check_bytes("4883f802", |cb| cmp(cb, RAX, imm_opnd(2)));
+}
+
+#[test]
+fn test_cqo() {
+    check_bytes("4899", |cb| cqo(cb));
+}
+
+#[test]
+fn test_jge_label() {
+    check_bytes("0f8dfaffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        jge_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_jmp_label() {
+    // Forward jump
+    check_bytes("e900000000", |cb| {
+        let label_idx = cb.new_label("next".to_owned());
+        jmp_label(cb, label_idx);
+        cb.write_label(label_idx);
+        cb.link_labels();
+    });
+
+    // Backwards jump
+    check_bytes("e9fbffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        cb.write_label(label_idx);
+        jmp_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_jmp_rm() {
+    check_bytes("41ffe4", |cb| jmp_rm(cb, R12));
+}
+
+#[test]
+fn test_jo_label() {
+    check_bytes("0f80faffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        jo_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_lea() {
+    check_bytes("488d5108", |cb| lea(cb, RDX, mem_opnd(64, RCX, 8)));
+    check_bytes("488d0500000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 0)));
+    check_bytes("488d0505000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 5)));
+    check_bytes("488d3d05000000", |cb| lea(cb, RDI, mem_opnd(8, RIP, 5)));
+}
+
+#[test]
+fn test_mov() {
+    check_bytes("b807000000", |cb| mov(cb, EAX, imm_opnd(7)));
+    check_bytes("b8fdffffff", |cb| mov(cb, EAX, imm_opnd(-3)));
+    check_bytes("41bf03000000", |cb| mov(cb, R15, imm_opnd(3)));
+    check_bytes("89d8", |cb| mov(cb, EAX, EBX));
+    check_bytes("89c8", |cb| mov(cb, EAX, ECX));
+    check_bytes("8b9380000000", |cb| mov(cb, EDX, mem_opnd(32, RBX, 128)));
+    check_bytes("488b442404", |cb| mov(cb, RAX, mem_opnd(64, RSP, 4)));
+
+    // Test `mov rax, 3` => `mov eax, 3` optimization
+    check_bytes("41b834000000", |cb| mov(cb, R8, imm_opnd(0x34)));
+    check_bytes("49b80000008000000000", |cb| mov(cb, R8, imm_opnd(0x80000000)));
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, imm_opnd(-1)));
+
+    check_bytes("b834000000", |cb| mov(cb, RAX, imm_opnd(0x34)));
+    check_bytes("48b8020000000000c0ff", |cb| mov(cb, RAX, imm_opnd(-18014398509481982)));
+    check_bytes("48b80000008000000000", |cb| mov(cb, RAX, imm_opnd(0x80000000)));
+    check_bytes("48b8ccffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-52))); // yasm thinks this could use a dword immediate instead of qword
+    check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-1))); // yasm thinks this could use a dword immediate instead of qword
+    check_bytes("4488c9", |cb| mov(cb, CL, R9B));
+    check_bytes("4889c3", |cb| mov(cb, RBX, RAX));
+    check_bytes("4889df", |cb| mov(cb, RDI, RBX));
+    check_bytes("40b60b", |cb| mov(cb, SIL, imm_opnd(11)));
+
+    check_bytes("c60424fd", |cb| mov(cb, mem_opnd(8, RSP, 0), imm_opnd(-3)));
+    check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1)));
+    //check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine?
+    check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17)));
+    check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX));
+    check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10));
+    check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12)));
+}
+
+#[test]
+fn test_mov_unsigned() {
+    // MOV AL, imm8
+    check_bytes("b001", |cb| mov(cb, AL, uimm_opnd(1)));
+    check_bytes("b0ff", |cb| mov(cb, AL, uimm_opnd(u8::MAX.into())));
+
+    // MOV AX, imm16
+    check_bytes("66b80100", |cb| mov(cb, AX, uimm_opnd(1)));
+    check_bytes("66b8ffff", |cb| mov(cb, AX, uimm_opnd(u16::MAX.into())));
+
+    // MOV EAX, imm32
+    check_bytes("b801000000", |cb| mov(cb, EAX, uimm_opnd(1)));
+    check_bytes("b8ffffffff", |cb| mov(cb, EAX, uimm_opnd(u32::MAX.into())));
+    check_bytes("41b800000000", |cb| mov(cb, R8, uimm_opnd(0)));
+    check_bytes("41b8ffffffff", |cb| mov(cb, R8, uimm_opnd(0xFF_FF_FF_FF)));
+
+    // MOV RAX, imm64, will move down into EAX since it fits into 32 bits
+    check_bytes("b801000000", |cb| mov(cb, RAX, uimm_opnd(1)));
+    check_bytes("b8ffffffff", |cb| mov(cb, RAX, uimm_opnd(u32::MAX.into())));
+
+    // MOV RAX, imm64, will not move down into EAX since it does not fit into 32 bits
+    check_bytes("48b80000000001000000", |cb| mov(cb, RAX, uimm_opnd(u32::MAX as u64 + 1)));
+    check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, uimm_opnd(u64::MAX.into())));
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX)));
+
+    // MOV r8, imm8
+    check_bytes("41b001", |cb| mov(cb, R8B, uimm_opnd(1)));
+    check_bytes("41b0ff", |cb| mov(cb, R8B, uimm_opnd(u8::MAX.into())));
+
+    // MOV r16, imm16
+    check_bytes("6641b80100", |cb| mov(cb, R8W, uimm_opnd(1)));
+    check_bytes("6641b8ffff", |cb| mov(cb, R8W, uimm_opnd(u16::MAX.into())));
+
+    // MOV r32, imm32
+    check_bytes("41b801000000", |cb| mov(cb, R8D, uimm_opnd(1)));
+    check_bytes("41b8ffffffff", |cb| mov(cb, R8D, uimm_opnd(u32::MAX.into())));
+
+    // MOV r64, imm64, will move down into 32 bit since it fits into 32 bits
+    check_bytes("41b801000000", |cb| mov(cb, R8, uimm_opnd(1)));
+
+    // MOV r64, imm64, will not move down into 32 bit since it does not fit into 32 bits
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX)));
+}
+
+#[test]
+fn test_mov_iprel() {
+    check_bytes("8b0500000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 0)));
+    check_bytes("8b0505000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 5)));
+
+    check_bytes("488b0500000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 0)));
+    check_bytes("488b0505000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 5)));
+    check_bytes("488b3d05000000", |cb| mov(cb, RDI, mem_opnd(64, RIP, 5)));
+}
+
+#[test]
+fn test_movsx() {
+    check_bytes("660fbec0", |cb| movsx(cb, AX, AL));
+    check_bytes("0fbed0", |cb| movsx(cb, EDX, AL));
+    check_bytes("480fbec3", |cb| movsx(cb, RAX, BL));
+    check_bytes("0fbfc8", |cb| movsx(cb, ECX, AX));
+    check_bytes("4c0fbed9", |cb| movsx(cb, R11, CL));
+    check_bytes("4c6354240c", |cb| movsx(cb, R10, mem_opnd(32, RSP, 12)));
+    check_bytes("480fbe0424", |cb| movsx(cb, RAX, mem_opnd(8, RSP, 0)));
+    check_bytes("490fbf5504", |cb| movsx(cb, RDX, mem_opnd(16, R13, 4)));
+}
+
+#[test]
+fn test_nop() {
+    check_bytes("90", |cb| nop(cb, 1));
+    check_bytes("6690", |cb| nop(cb, 2));
+    check_bytes("0f1f00", |cb| nop(cb, 3));
+    check_bytes("0f1f4000", |cb| nop(cb, 4));
+    check_bytes("0f1f440000", |cb| nop(cb, 5));
+    check_bytes("660f1f440000", |cb| nop(cb, 6));
+    check_bytes("0f1f8000000000", |cb| nop(cb, 7));
+    check_bytes("0f1f840000000000", |cb| nop(cb, 8));
+    check_bytes("660f1f840000000000", |cb| nop(cb, 9));
+    check_bytes("660f1f84000000000090", |cb| nop(cb, 10));
+    check_bytes("660f1f8400000000006690", |cb| nop(cb, 11));
+    check_bytes("660f1f8400000000000f1f00", |cb| nop(cb, 12));
+}
+
+#[test]
+fn test_not() {
+    check_bytes("66f7d0", |cb| not(cb, AX));
+    check_bytes("f7d0", |cb| not(cb, EAX));
+    check_bytes("49f71424", |cb| not(cb, mem_opnd(64, R12, 0)));
+    check_bytes("f794242d010000", |cb| not(cb, mem_opnd(32, RSP, 301)));
+    check_bytes("f71424", |cb| not(cb, mem_opnd(32, RSP, 0)));
+    check_bytes("f7542403", |cb| not(cb, mem_opnd(32, RSP, 3)));
+    check_bytes("f75500", |cb| not(cb, mem_opnd(32, RBP, 0)));
+    check_bytes("f7550d", |cb| not(cb, mem_opnd(32, RBP, 13)));
+    check_bytes("48f7d0", |cb| not(cb, RAX));
+    check_bytes("49f7d3", |cb| not(cb, R11));
+    check_bytes("f710", |cb| not(cb, mem_opnd(32, RAX, 0)));
+    check_bytes("f716", |cb| not(cb, mem_opnd(32, RSI, 0)));
+    check_bytes("f717", |cb| not(cb, mem_opnd(32, RDI, 0)));
+    check_bytes("f75237", |cb| not(cb, mem_opnd(32, RDX, 55)));
+    check_bytes("f79239050000", |cb| not(cb, mem_opnd(32, RDX, 1337)));
+    check_bytes("f752c9", |cb| not(cb, mem_opnd(32, RDX, -55)));
+    check_bytes("f792d5fdffff", |cb| not(cb, mem_opnd(32, RDX, -555)));
+}
+
+#[test]
+fn test_or() {
+    check_bytes("09f2", |cb| or(cb, EDX, ESI));
+}
+
+#[test]
+fn test_pop() {
+    check_bytes("58", |cb| pop(cb, RAX));
+    check_bytes("5b", |cb| pop(cb, RBX));
+    check_bytes("5c", |cb| pop(cb, RSP));
+    check_bytes("5d", |cb| pop(cb, RBP));
+    check_bytes("415c", |cb| pop(cb, R12));
+    check_bytes("8f00", |cb| pop(cb, mem_opnd(64, RAX, 0)));
+    check_bytes("418f00", |cb| pop(cb, mem_opnd(64, R8, 0)));
+    check_bytes("418f4003", |cb| pop(cb, mem_opnd(64, R8, 3)));
+    check_bytes("8f44c803", |cb| pop(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)));
+    check_bytes("418f44c803", |cb| pop(cb, mem_opnd_sib(64, R8, RCX, 8, 3)));
+}
+
+#[test]
+fn test_push() {
+    check_bytes("50", |cb| push(cb, RAX));
+    check_bytes("53", |cb| push(cb, RBX));
+    check_bytes("4154", |cb| push(cb, R12));
+    check_bytes("ff30", |cb| push(cb, mem_opnd(64, RAX, 0)));
+    check_bytes("41ff30", |cb| push(cb, mem_opnd(64, R8, 0)));
+    check_bytes("41ff7003", |cb| push(cb, mem_opnd(64, R8, 3)));
+    check_bytes("ff74c803", |cb| push(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)));
+    check_bytes("41ff74c803", |cb| push(cb, mem_opnd_sib(64, R8, RCX, 8, 3)));
+}
+
+#[test]
+fn test_ret() {
+    check_bytes("c3", |cb| ret(cb));
+}
+
+#[test]
+fn test_sal() {
+    check_bytes("66d1e1", |cb| sal(cb, CX, uimm_opnd(1)));
+    check_bytes("d1e1", |cb| sal(cb, ECX, uimm_opnd(1)));
+    check_bytes("c1e505", |cb| sal(cb, EBP, uimm_opnd(5)));
+    check_bytes("d1642444", |cb| sal(cb, mem_opnd(32, RSP, 68), uimm_opnd(1)));
+}
+
+#[test]
+fn test_sar() {
+    check_bytes("d1fa", |cb| sar(cb, EDX, uimm_opnd(1)));
+}
+
+#[test]
+fn test_shr() {
+    check_bytes("49c1ee07", |cb| shr(cb, R14, uimm_opnd(7)));
+}
+
+#[test]
+fn test_sub() {
+    check_bytes("83e801", |cb| sub(cb, EAX, imm_opnd(1)));
+    check_bytes("4883e802", |cb| sub(cb, RAX, imm_opnd(2)));
+}
+
+#[test]
+fn test_test() {
+    check_bytes("84c0", |cb| test(cb, AL, AL));
+    check_bytes("6685c0", |cb| test(cb, AX, AX));
+    check_bytes("f6c108", |cb| test(cb, CL, uimm_opnd(8)));
+    check_bytes("f6c207", |cb| test(cb, DL, uimm_opnd(7)));
+    check_bytes("f6c108", |cb| test(cb, RCX, uimm_opnd(8)));
+    check_bytes("f6420808", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(8)));
+    check_bytes("f64208ff", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(255)));
+    check_bytes("66f7c2ffff", |cb| test(cb, DX, uimm_opnd(0xffff)));
+    check_bytes("66f74208ffff", |cb| test(cb, mem_opnd(16, RDX, 8), uimm_opnd(0xffff)));
+    check_bytes("f60601", |cb| test(cb, mem_opnd(8, RSI, 0), uimm_opnd(1)));
+    check_bytes("f6461001", |cb| test(cb, mem_opnd(8, RSI, 16), uimm_opnd(1)));
+    check_bytes("f646f001", |cb| test(cb, mem_opnd(8, RSI, -16), uimm_opnd(1)));
+    check_bytes("854640", |cb| test(cb, mem_opnd(32, RSI, 64), EAX));
+    check_bytes("4885472a", |cb| test(cb, mem_opnd(64, RDI, 42), RAX));
+    check_bytes("4885c0", |cb| test(cb, RAX, RAX));
+    check_bytes("4885f0", |cb| test(cb, RAX, RSI));
+    check_bytes("48f74640f7ffffff", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)));
+    check_bytes("48f7464008000000", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(0x08)));
+    check_bytes("48f7c108000000", |cb| test(cb, RCX, imm_opnd(0x08)));
+    //check_bytes("48a9f7ffff0f", |cb| test(cb, RAX, imm_opnd(0x0FFFFFF7)));
+}
+
+#[test]
+fn test_xchg() {
+    check_bytes("4891", |cb| xchg(cb, RAX, RCX));
+    check_bytes("4995", |cb| xchg(cb, RAX, R13));
+    check_bytes("4887d9", |cb| xchg(cb, RCX, RBX));
+    check_bytes("4d87f9", |cb| xchg(cb, R9, R15));
+}
+
+#[test]
+fn test_xor() {
+    check_bytes("31c0", |cb| xor(cb, EAX, EAX));
+}
+
+#[test]
+#[cfg(feature = "disasm")]
+fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
+    // Test drive Capstone with simple input
+    extern crate capstone;
+    use capstone::prelude::*;
+    let cs = Capstone::new()
+        .x86()
+        .mode(arch::x86::ArchMode::Mode64)
+        .syntax(arch::x86::ArchSyntax::Intel)
+        .build()?;
+
+    let insns = cs.disasm_all(&[0xCC], 0x1000)?;
+
+    match insns.as_ref() {
+        [insn] => {
+            assert_eq!(Some("int3"), insn.mnemonic());
+            Ok(())
+        }
+        _ => Err(capstone::Error::CustomError(
+            "expected to disassemble to int3",
+        )),
+    }
+}
+
+#[test]
+#[cfg(feature = "asm_comments")]
+fn block_comments() {
+    let mut cb = super::CodeBlock::new_dummy(4096);
+
+    let first_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Beginning");
+    xor(&mut cb, EAX, EAX); // 2 bytes long
+    let second_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Two bytes in");
+    cb.add_comment("Still two bytes in");
+    cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
+    test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
+    let third_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Ten bytes in");
+
+    assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());
+    assert_eq!(&vec!( "Two bytes in".to_string(), "Still two bytes in".to_string() ), cb.comments_at(second_write_ptr).unwrap());
+    assert_eq!(&vec!( "Ten bytes in".to_string() ), cb.comments_at(third_write_ptr).unwrap());
+}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
new file mode 100644
index 0000000000..5c5c221c76
--- /dev/null
+++ b/yjit/src/codegen.rs
@@ -0,0 +1,6180 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::invariants::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::*;
+use CodegenStatus::*;
+use InsnOpnd::*;
+
+use std::cell::RefMut;
+use std::cmp;
+use std::collections::HashMap;
+use std::ffi::CStr;
+use std::mem::{self, size_of};
+use std::os::raw::c_uint;
+use std::ptr;
+use std::slice;
+
+// Callee-saved registers
+pub const REG_CFP: X86Opnd = R13;
+pub const REG_EC: X86Opnd = R12;
+pub const REG_SP: X86Opnd = RBX;
+
+// Scratch registers used by YJIT
+pub const REG0: X86Opnd = RAX;
+pub const REG0_32: X86Opnd = EAX;
+pub const REG0_8: X86Opnd = AL;
+pub const REG1: X86Opnd = RCX;
+pub const REG1_32: X86Opnd = ECX;
+
+/// Status returned by code generation functions
+#[derive(PartialEq, Debug)]
+enum CodegenStatus {
+    EndBlock,
+    KeepCompiling,
+    CantCompile,
+}
+
+/// Code generation function signature
+type InsnGenFn = fn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus;
+
+/// Code generation state
+/// This struct only lives while code is being generated
+pub struct JITState {
+    // Block version being compiled
+    block: BlockRef,
+
+    // Instruction sequence this is associated with
+    iseq: IseqPtr,
+
+    // Index of the current instruction being compiled
+    insn_idx: u32,
+
+    // Opcode for the instruction being compiled
+    opcode: usize,
+
+    // PC of the instruction being compiled
+    pc: *mut VALUE,
+
+    // Side exit to the instruction being compiled. See :side-exit:.
+    side_exit_for_pc: Option<CodePtr>,
+
+    // Execution context when compilation started
+    // This allows us to peek at run-time values
+    ec: Option<EcPtr>,
+
+    // Whether we need to record the code address at
+    // the end of this bytecode instruction for global invalidation
+    record_boundary_patch_point: bool,
+}
+
+impl JITState {
+    pub fn new(blockref: &BlockRef) -> Self {
+        JITState {
+            block: blockref.clone(),
+            iseq: ptr::null(), // TODO: initialize this from the blockid
+            insn_idx: 0,
+            opcode: 0,
+            pc: ptr::null_mut::<VALUE>(),
+            side_exit_for_pc: None,
+            ec: None,
+            record_boundary_patch_point: false,
+        }
+    }
+
+    pub fn get_block(&self) -> BlockRef {
+        self.block.clone()
+    }
+
+    pub fn get_insn_idx(&self) -> u32 {
+        self.insn_idx
+    }
+
+    pub fn get_iseq(self: &JITState) -> IseqPtr {
+        self.iseq
+    }
+
+    pub fn get_opcode(self: &JITState) -> usize {
+        self.opcode
+    }
+
+    pub fn set_opcode(self: &mut JITState, opcode: usize) {
+        self.opcode = opcode;
+    }
+
+    pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) {
+        let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut();
+        gc_obj_vec.add_gc_object_offset(ptr_offset);
+    }
+
+    pub fn get_pc(self: &JITState) -> *mut VALUE {
+        self.pc
+    }
+
+    pub fn set_pc(self: &mut JITState, pc: *mut VALUE) {
+        self.pc = pc;
+    }
+}
+
+use crate::codegen::JCCKinds::*;
+
+#[allow(non_camel_case_types)]
+pub enum JCCKinds {
+    JCC_JNE,
+    JCC_JNZ,
+    JCC_JZ,
+    JCC_JE,
+    JCC_JBE,
+    JCC_JNA,
+}
+
+pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE {
+    // insn_len require non-test config
+    #[cfg(not(test))]
+    assert!(insn_len(jit.get_opcode()) > (arg_idx + 1).try_into().unwrap());
+    unsafe { *(jit.pc.offset(arg_idx + 1)) }
+}
+
+// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
+pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) {
+    assert!(matches!(reg, X86Opnd::Reg(_)));
+    assert!(reg.num_bits() == 64);
+
+    // Load the pointer constant into the specified register
+    mov(cb, reg, const_ptr_opnd(ptr.as_ptr()));
+
+    // The pointer immediate is encoded as the last part of the mov written out
+    let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+
+    if !ptr.special_const_p() {
+        jit.add_gc_object_offset(ptr_offset);
+    }
+}
+
+// Get the index of the next instruction
+fn jit_next_insn_idx(jit: &JITState) -> u32 {
+    jit.insn_idx + insn_len(jit.get_opcode())
+}
+
+// Check if we are compiling the instruction at the stub PC
+// Meaning we are compiling the instruction that is next to execute
+fn jit_at_current_insn(jit: &JITState) -> bool {
+    let ec_pc: *mut VALUE = unsafe { get_cfp_pc(get_ec_cfp(jit.ec.unwrap())) };
+    ec_pc == jit.pc
+}
+
+// Peek at the nth topmost value on the Ruby stack.
+// Returns the topmost value when n == 0.
+fn jit_peek_at_stack(jit: &JITState, ctx: &Context, n: isize) -> VALUE {
+    assert!(jit_at_current_insn(jit));
+    assert!(n < ctx.get_stack_size() as isize);
+
+    // Note: this does not account for ctx->sp_offset because
+    // this is only available when hitting a stub, and while
+    // hitting a stub, cfp->sp needs to be up to date in case
+    // codegen functions trigger GC. See :stub-sp-flush:.
+    return unsafe {
+        let sp: *mut VALUE = get_cfp_sp(get_ec_cfp(jit.ec.unwrap()));
+
+        *(sp.offset(-1 - n))
+    };
+}
+
+fn jit_peek_at_self(jit: &JITState) -> VALUE {
+    unsafe { get_cfp_self(get_ec_cfp(jit.ec.unwrap())) }
+}
+
+fn jit_peek_at_local(jit: &JITState, n: i32) -> VALUE {
+    assert!(jit_at_current_insn(jit));
+
+    let local_table_size: isize = unsafe { get_iseq_body_local_table_size(jit.iseq) }
+        .try_into()
+        .unwrap();
+    assert!(n < local_table_size.try_into().unwrap());
+
+    unsafe {
+        let ep = get_cfp_ep(get_ec_cfp(jit.ec.unwrap()));
+        let n_isize: isize = n.try_into().unwrap();
+        let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1;
+        *ep.offset(offs)
+    }
+}
+
+// Add a comment at the current position in the code block
+fn add_comment(cb: &mut CodeBlock, comment_str: &str) {
+    if cfg!(feature = "asm_comments") {
+        cb.add_comment(comment_str);
+    }
+}
+
+/// Increment a profiling counter with counter_name
+#[cfg(not(feature = "stats"))]
+macro_rules! gen_counter_incr {
+    ($cb:tt, $counter_name:ident) => {};
+}
+#[cfg(feature = "stats")]
+macro_rules! gen_counter_incr {
+    ($cb:tt, $counter_name:ident) => {
+        if (get_option!(gen_stats)) {
+            // Get a pointer to the counter variable
+            let ptr = ptr_to_counter!($counter_name);
+
+            // Use REG1 because there might be return value in REG0
+            mov($cb, REG1, const_ptr_opnd(ptr as *const u8));
+            write_lock_prefix($cb); // for ractors.
+            add($cb, mem_opnd(64, REG1, 0), imm_opnd(1));
+        }
+    };
+}
+
+/// Increment a counter then take an existing side exit
+#[cfg(not(feature = "stats"))]
+macro_rules! counted_exit {
+    ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {{
+        let _ = $ocb;
+        $existing_side_exit
+    }};
+}
+#[cfg(feature = "stats")]
+macro_rules! counted_exit {
+    ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {
+        // The counter is only incremented when stats are enabled
+        if (!get_option!(gen_stats)) {
+            $existing_side_exit
+        } else {
+            let ocb = $ocb.unwrap();
+            let code_ptr = ocb.get_write_ptr();
+
+            // Increment the counter
+            gen_counter_incr!(ocb, $counter_name);
+
+            // Jump to the existing side exit
+            jmp_ptr(ocb, $existing_side_exit);
+
+            // Pointer to the side-exit code
+            code_ptr
+        }
+    };
+}
+
+// Save the incremented PC on the CFP
+// This is necessary when callees can raise or allocate
+fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) {
+    let pc: *mut VALUE = jit.get_pc();
+    let ptr: *mut VALUE = unsafe {
+        let cur_insn_len = insn_len(jit.get_opcode()) as isize;
+        pc.offset(cur_insn_len)
+    };
+    mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg);
+}
+
+/// Save the current SP on the CFP
+/// This realigns the interpreter SP with the JIT SP
+/// Note: this will change the current value of REG_SP,
+///       which could invalidate memory operands
+fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) {
+    if ctx.get_sp_offset() != 0 {
+        let stack_pointer = ctx.sp_opnd(0);
+        lea(cb, REG_SP, stack_pointer);
+        let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP);
+        mov(cb, cfp_sp_opnd, REG_SP);
+        ctx.set_sp_offset(0);
+    }
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that
+/// could:
+///  - Perform GC allocation
+///  - Take the VM lock through RB_VM_LOCK_ENTER()
+///  - Perform Ruby method call
+fn jit_prepare_routine_call(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    scratch_reg: X86Opnd,
+) {
+    jit.record_boundary_patch_point = true;
+    jit_save_pc(jit, cb, scratch_reg);
+    gen_save_sp(cb, ctx);
+
+    // In case the routine calls Ruby methods, it can set local variables
+    // through Kernel#binding and other means.
+    ctx.clear_local_types();
+}
+
+/// Record the current codeblock write position for rewriting into a jump into
+/// the outlined block later. Used to implement global code invalidation.
+fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) {
+    CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos);
+}
+
+/// Verify the ctx's types and mappings against the compile-time stack, self,
+/// and locals.
+fn verify_ctx(jit: &JITState, ctx: &Context) {
+    fn obj_info_str<'a>(val: VALUE) -> &'a str {
+        unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() }
+    }
+
+    // Only able to check types when at current insn
+    assert!(jit_at_current_insn(jit));
+
+    let self_val = jit_peek_at_self(jit);
+    let self_val_type = Type::from(self_val);
+
+    // Verify self operand type
+    if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == usize::MAX {
+        panic!(
+            "verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}",
+            ctx.get_opnd_type(SelfOpnd),
+            obj_info_str(self_val)
+        );
+    }
+
+    // Verify stack operand types
+    let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u16);
+    for i in 0..top_idx {
+        let (learned_mapping, learned_type) = ctx.get_opnd_mapping(StackOpnd(i));
+        let stack_val = jit_peek_at_stack(jit, ctx, i as isize);
+        let val_type = Type::from(stack_val);
+
+        match learned_mapping {
+            TempMapping::MapToSelf => {
+                if self_val != stack_val {
+                    panic!(
+                        "verify_ctx: stack value was mapped to self, but values did not match!\n  stack: {}\n  self: {}",
+                        obj_info_str(stack_val),
+                        obj_info_str(self_val)
+                    );
+                }
+            }
+            TempMapping::MapToLocal(local_idx) => {
+                let local_val = jit_peek_at_local(jit, local_idx.into());
+                if local_val != stack_val {
+                    panic!(
+                        "verify_ctx: stack value was mapped to local, but values did not match\n  stack: {}\n  local {}: {}",
+                        obj_info_str(stack_val),
+                        local_idx,
+                        obj_info_str(local_val)
+                    );
+                }
+            }
+            TempMapping::MapToStack => {}
+        }
+
+        // If the actual type differs from the learned type
+        if val_type.diff(learned_type) == usize::MAX {
+            panic!(
+                "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {}",
+                learned_type,
+                obj_info_str(stack_val)
+            );
+        }
+    }
+
+    // Verify local variable types
+    let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
+    let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES);
+    for i in 0..top_idx {
+        let learned_type = ctx.get_local_type(i);
+        let local_val = jit_peek_at_local(jit, i as i32);
+        let local_type = Type::from(local_val);
+
+        if local_type.diff(learned_type) == usize::MAX {
+            panic!(
+                "verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})",
+                learned_type,
+                obj_info_str(local_val),
+                local_type
+            );
+        }
+    }
+}
+
+/// Generate an exit to return to the interpreter
+fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr {
+    let code_ptr = cb.get_write_ptr();
+
+    add_comment(cb, "exit to interpreter");
+
+    // Generate the code to exit to the interpreters
+    // Write the adjusted SP back into the CFP
+    if ctx.get_sp_offset() != 0 {
+        let stack_pointer = ctx.sp_opnd(0);
+        lea(cb, REG_SP, stack_pointer);
+        mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP);
+    }
+
+    // Update CFP->PC
+    mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX);
+
+    // Accumulate stats about interpreter exits
+    #[cfg(feature = "stats")]
+    if get_option!(gen_stats) {
+        mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8));
+        call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8);
+    }
+
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, uimm_opnd(Qundef.into()));
+    ret(cb);
+
+    return code_ptr;
+}
+
+// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
+// to the interpreter when it cannot service a stub by generating new code.
+// Before coming here, branch_stub_hit() takes care of fully reconstructing
+// interpreter state.
+fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr {
+    let ocb = ocb.unwrap();
+    let code_ptr = ocb.get_write_ptr();
+
+    gen_counter_incr!(ocb, exit_from_branch_stub);
+
+    pop(ocb, REG_SP);
+    pop(ocb, REG_EC);
+    pop(ocb, REG_CFP);
+
+    mov(ocb, RAX, uimm_opnd(Qundef.into()));
+    ret(ocb);
+
+    return code_ptr;
+}
+
+// :side-exit:
+// Get an exit for the current instruction in the outlined block. The code
+// for each instruction often begins with several guards before proceeding
+// to do work. When guards fail, an option we have is to exit to the
+// interpreter at an instruction boundary. The piece of code that takes
+// care of reconstructing interpreter state and exiting out of generated
+// code is called the side exit.
+//
+// No guards change the logic for reconstructing interpreter state at the
+// moment, so there is one unique side exit for each context. Note that
+// it's incorrect to jump to the side exit after any ctx stack push/pop operations
+// since they change the logic required for reconstructing interpreter state.
+fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr {
+    match jit.side_exit_for_pc {
+        None => {
+            let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap());
+            jit.side_exit_for_pc = Some(exit_code);
+            exit_code
+        }
+        Some(code_ptr) => code_ptr,
+    }
+}
+
+// Ensure that there is an exit for the start of the block being compiled.
+// Block invalidation uses this exit.
+pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) {
+    let blockref = jit.block.clone();
+    let mut block = blockref.borrow_mut();
+    let block_ctx = block.get_ctx();
+    let blockid = block.get_blockid();
+
+    if block.entry_exit.is_some() {
+        return;
+    }
+
+    if jit.insn_idx == blockid.idx {
+        // We are compiling the first instruction in the block.
+        // Generate the exit with the cache in jitstate.
+        block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx));
+    } else {
+        let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) };
+        block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap()));
+    }
+}
+
+// Generate a runtime guard that ensures the PC is at the expected
+// instruction index in the iseq, otherwise takes a side-exit.
+// This is to handle the situation of optional parameters.
+// When a function with optional parameters is called, the entry
+// PC for the method isn't necessarily 0.
+fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) {
+    //RUBY_ASSERT(cb != NULL);
+
+    let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC);
+    let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+    let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8);
+    mov(cb, REG0, pc_opnd);
+    mov(cb, REG1, expected_pc_opnd);
+    cmp(cb, REG0, REG1);
+
+    let pc_match = cb.new_label("pc_match".to_string());
+    je_label(cb, pc_match);
+
+    // We're not starting at the first PC, so we need to exit.
+    gen_counter_incr!(cb, leave_start_pc_non_zero);
+
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, imm_opnd(Qundef.into()));
+    ret(cb);
+
+    // PC should match the expected insn_idx
+    cb.write_label(pc_match);
+    cb.link_labels();
+}
+
+// Landing code for when c_return tracing is enabled. See full_cfunc_return().
+fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr {
+    let cb = ocb.unwrap();
+    let code_ptr = cb.get_write_ptr();
+
+    // This chunk of code expect REG_EC to be filled properly and
+    // RAX to contain the return value of the C method.
+
+    // Call full_cfunc_return()
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], RAX);
+    call_ptr(cb, REG0, rb_full_cfunc_return as *const u8);
+
+    // Count the exit
+    gen_counter_incr!(cb, traced_cfunc_return);
+
+    // Return to the interpreter
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, uimm_opnd(Qundef.into()));
+    ret(cb);
+
+    return code_ptr;
+}
+
+/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc.
+/// This is used by gen_leave() and gen_entry_prologue()
+fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr {
+    let ocb = ocb.unwrap();
+    let code_ptr = ocb.get_write_ptr();
+
+    // Note, gen_leave() fully reconstructs interpreter state and leaves the
+    // return value in RAX before coming here.
+
+    // Every exit to the interpreter should be counted
+    gen_counter_incr!(ocb, leave_interp_return);
+
+    pop(ocb, REG_SP);
+    pop(ocb, REG_EC);
+    pop(ocb, REG_CFP);
+
+    ret(ocb);
+
+    return code_ptr;
+}
+
+/// Compile an interpreter entry block to be inserted into an iseq
+/// Returns None if compilation fails.
+pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
+    const MAX_PROLOGUE_SIZE: usize = 1024;
+
+    // Check if we have enough executable memory
+    if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
+        return None;
+    }
+
+    let old_write_pos = cb.get_write_pos();
+
+    // Align the current write position to cache line boundaries
+    cb.align_pos(64);
+
+    let code_ptr = cb.get_write_ptr();
+    add_comment(cb, "yjit entry");
+
+    push(cb, REG_CFP);
+    push(cb, REG_EC);
+    push(cb, REG_SP);
+
+    // We are passed EC and CFP
+    mov(cb, REG_EC, C_ARG_REGS[0]);
+    mov(cb, REG_CFP, C_ARG_REGS[1]);
+
+    // Load the current SP from the CFP into REG_SP
+    mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
+
+    // Setup cfp->jit_return
+    mov(
+        cb,
+        REG0,
+        code_ptr_opnd(CodegenGlobals::get_leave_exit_code()),
+    );
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+
+    // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
+    // the case of optional parameters, the interpreter can set the pc to a
+    // different location depending on the optional parameters.  If an iseq
+    // has optional parameters, we'll add a runtime check that the PC we've
+    // compiled for is the same PC that the interpreter wants us to run with.
+    // If they don't match, then we'll take a side exit.
+    if unsafe { get_iseq_flags_has_opt(iseq) } {
+        gen_pc_guard(cb, iseq, insn_idx);
+    }
+
+    // Verify MAX_PROLOGUE_SIZE
+    assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
+
+    return Some(code_ptr);
+}
+
+// Generate code to check for interrupts and take a side-exit.
+// Warning: this function clobbers REG0
+fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) {
+    // Check for interrupts
+    // see RUBY_VM_CHECK_INTS(ec) macro
+    add_comment(cb, "RUBY_VM_CHECK_INTS(ec)");
+    mov(
+        cb,
+        REG0_32,
+        mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK),
+    );
+    not(cb, REG0_32);
+    test(
+        cb,
+        mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
+        REG0_32,
+    );
+    jnz_ptr(cb, side_exit);
+}
+
+// Generate a stubbed unconditional jump to the next bytecode instruction.
+// Blocks that are part of a guard chain can use this to share the same successor.
+fn jump_to_next_insn(
+    jit: &mut JITState,
+    current_context: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) {
+    // Reset the depth since in current usages we only ever jump to to
+    // chain_depth > 0 from the same instruction.
+    let mut reset_depth = current_context.clone();
+    reset_depth.reset_chain_depth();
+
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jit_next_insn_idx(jit),
+    };
+
+    // We are at the end of the current instruction. Record the boundary.
+    if jit.record_boundary_patch_point {
+        let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) };
+        let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap());
+        record_global_inval_patch(cb, exit_pos);
+        jit.record_boundary_patch_point = false;
+    }
+
+    // Generate the jump instruction
+    gen_direct_jump(jit, &reset_depth, jump_block, cb);
+}
+
+// Compile a sequence of bytecode instructions for a given basic block version.
+// Part of gen_block_version().
+// Note: this function will mutate its context while generating code,
+//       but the input start_ctx argument should remain immutable.
+pub fn gen_single_block(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Result<BlockRef, ()> {
+    // Limit the number of specialized versions for this block
+    let mut ctx = limit_block_versions(blockid, start_ctx);
+
+    verify_blockid(blockid);
+    assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0));
+
+    // Instruction sequence to compile
+    let iseq = blockid.iseq;
+    let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
+    let mut insn_idx: c_uint = blockid.idx;
+    let starting_insn_idx = insn_idx;
+
+    // Allocate the new block
+    let blockref = Block::new(blockid, &ctx);
+
+    // Initialize a JIT state object
+    let mut jit = JITState::new(&blockref);
+    jit.iseq = blockid.iseq;
+    jit.ec = Some(ec);
+
+    // Mark the start position of the block
+    blockref.borrow_mut().set_start_addr(cb.get_write_ptr());
+
+    // For each instruction to compile
+    // NOTE: could rewrite this loop with a std::iter::Iterator
+    while insn_idx < iseq_size {
+        // Get the current pc and opcode
+        let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+        // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
+        let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
+            .try_into()
+            .unwrap();
+
+        // opt_getinlinecache wants to be in a block all on its own. Cut the block short
+        // if we run into it. See gen_opt_getinlinecache() for details.
+        if opcode == OP_OPT_GETINLINECACHE && insn_idx > starting_insn_idx {
+            jump_to_next_insn(&mut jit, &ctx, cb, ocb);
+            break;
+        }
+
+        // Set the current instruction
+        jit.insn_idx = insn_idx;
+        jit.opcode = opcode;
+        jit.pc = pc;
+        jit.side_exit_for_pc = None;
+
+        // If previous instruction requested to record the boundary
+        if jit.record_boundary_patch_point {
+            // Generate an exit to this instruction and record it
+            let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap());
+            record_global_inval_patch(cb, exit_pos);
+            jit.record_boundary_patch_point = false;
+        }
+
+        // In debug mode, verify our existing assumption
+        #[cfg(debug_assertions)]
+        if get_option!(verify_ctx) && jit_at_current_insn(&jit) {
+            verify_ctx(&jit, &ctx);
+        }
+
+        // Lookup the codegen function for this instruction
+        let mut status = CantCompile;
+        if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) {
+            // :count-placement:
+            // Count bytecode instructions that execute in generated code.
+            // Note that the increment happens even when the output takes side exit.
+            gen_counter_incr!(cb, exec_instruction);
+
+            // Add a comment for the name of the YARV instruction
+            add_comment(cb, &insn_name(opcode));
+
+            // If requested, dump instructions for debugging
+            if get_option!(dump_insns) {
+                println!("compiling {}", insn_name(opcode));
+                print_str(cb, &format!("executing {}", insn_name(opcode)));
+            }
+
+            // Call the code generation function
+            status = gen_fn(&mut jit, &mut ctx, cb, ocb);
+        }
+
+        // If we can't compile this instruction
+        // exit to the interpreter and stop compiling
+        if status == CantCompile {
+            let mut block = jit.block.borrow_mut();
+
+            // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
+            // the exit this generates would be wrong. We could save a copy of the entry context
+            // and assert that ctx is the same here.
+            let exit = gen_exit(jit.pc, &ctx, cb);
+
+            // If this is the first instruction in the block, then we can use
+            // the exit for block->entry_exit.
+            if insn_idx == block.get_blockid().idx {
+                block.entry_exit = Some(exit);
+            }
+
+            break;
+        }
+
+        // For now, reset the chain depth after each instruction as only the
+        // first instruction in the block can concern itself with the depth.
+        ctx.reset_chain_depth();
+
+        // Move to the next instruction to compile
+        insn_idx += insn_len(opcode);
+
+        // If the instruction terminates this block
+        if status == EndBlock {
+            break;
+        }
+    }
+
+    // Finish filling out the block
+    {
+        let mut block = jit.block.borrow_mut();
+
+        // Mark the end position of the block
+        block.set_end_addr(cb.get_write_ptr());
+
+        // Store the index of the last instruction in the block
+        block.set_end_idx(insn_idx);
+    }
+
+    // We currently can't handle cases where the request is for a block that
+    // doesn't go to the next instruction.
+    //assert!(!jit.record_boundary_patch_point);
+
+    // If code for the block doesn't fit, fail
+    if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
+        return Err(());
+    }
+
+    // TODO: we may want a feature for this called dump_insns? Can leave commented for now
+    /*
+    if (YJIT_DUMP_MODE >= 2) {
+        // Dump list of compiled instrutions
+        fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
+        for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
+            int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
+            fprintf(stderr, "  %04d %s\n", idx, insn_name(opcode));
+            idx += insn_len(opcode);
+        }
+    }
+    */
+
+    // Block compiled successfully
+    Ok(blockref)
+}
+
+fn gen_nop(
+    _jit: &mut JITState,
+    _ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Do nothing
+    KeepCompiling
+}
+
+fn gen_pop(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Decrement SP
+    ctx.stack_pop(1);
+    KeepCompiling
+}
+
+fn gen_dup(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let dup_val = ctx.stack_pop(0);
+    let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0));
+
+    let loc0 = ctx.stack_push_mapping((mapping, tmp_type));
+    mov(cb, REG0, dup_val);
+    mov(cb, loc0, REG0);
+
+    KeepCompiling
+}
+
+// duplicate stack top n elements
+fn gen_dupn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    // In practice, seems to be only used for n==2
+    if n != 2 {
+        return CantCompile;
+    }
+
+    let opnd1: X86Opnd = ctx.stack_opnd(1);
+    let opnd0: X86Opnd = ctx.stack_opnd(0);
+
+    let mapping1 = ctx.get_opnd_mapping(StackOpnd(1));
+    let mapping0 = ctx.get_opnd_mapping(StackOpnd(0));
+
+    let dst1: X86Opnd = ctx.stack_push_mapping(mapping1);
+    mov(cb, REG0, opnd1);
+    mov(cb, dst1, REG0);
+
+    let dst0: X86Opnd = ctx.stack_push_mapping(mapping0);
+    mov(cb, REG0, opnd0);
+    mov(cb, dst0, REG0);
+
+    KeepCompiling
+}
+
+// Swap top 2 stack entries
+fn gen_swap(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    stack_swap(ctx, cb, 0, 1, REG0, REG1);
+    KeepCompiling
+}
+
+fn stack_swap(
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    offset0: u16,
+    offset1: u16,
+    _reg0: X86Opnd,
+    _reg1: X86Opnd,
+) {
+    let opnd0 = ctx.stack_opnd(offset0 as i32);
+    let opnd1 = ctx.stack_opnd(offset1 as i32);
+
+    let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0));
+    let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1));
+
+    mov(cb, REG0, opnd0);
+    mov(cb, REG1, opnd1);
+    mov(cb, opnd0, REG1);
+    mov(cb, opnd1, REG0);
+
+    ctx.set_opnd_mapping(StackOpnd(offset0), mapping1);
+    ctx.set_opnd_mapping(StackOpnd(offset1), mapping0);
+}
+
+fn gen_putnil(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    jit_putobject(jit, ctx, cb, Qnil);
+    KeepCompiling
+}
+
+fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) {
+    let val_type: Type = Type::from(arg);
+    let stack_top = ctx.stack_push(val_type);
+
+    if arg.special_const_p() {
+        // Immediates will not move and do not need to be tracked for GC
+        // Thanks to this we can mov directly to memory when possible.
+        let imm = imm_opnd(arg.as_i64());
+
+        // 64-bit immediates can't be directly written to memory
+        if imm.num_bits() <= 32 {
+            mov(cb, stack_top, imm);
+        } else {
+            mov(cb, REG0, imm);
+            mov(cb, stack_top, REG0);
+        }
+    } else {
+        // Load the value to push into REG0
+        // Note that this value may get moved by the GC
+        jit_mov_gc_ptr(jit, cb, REG0, arg);
+
+        // Write argument at SP
+        mov(cb, stack_top, REG0);
+    }
+}
+
+fn gen_putobject_int2fix(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let opcode = jit.opcode;
+    let cst_val: usize = if opcode == OP_PUTOBJECT_INT2FIX_0_ {
+        0
+    } else {
+        1
+    };
+
+    jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val));
+    KeepCompiling
+}
+
+fn gen_putobject(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let arg: VALUE = jit_get_arg(jit, 0);
+
+    jit_putobject(jit, ctx, cb, arg);
+    KeepCompiling
+}
+
+fn gen_putself(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Load self from CFP
+    let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF);
+    mov(cb, REG0, cf_opnd);
+
+    // Write it on the stack
+    let stack_top: X86Opnd = ctx.stack_push_self();
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_putspecialobject(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let object_type = jit_get_arg(jit, 0);
+
+    if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE) {
+        let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap);
+        jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore });
+        mov(cb, stack_top, REG0);
+        KeepCompiling
+    } else {
+        // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
+        // VM_SPECIAL_OBJECT_CONST_BASE
+        CantCompile
+    }
+}
+
+// set Nth stack entry to stack top
+fn gen_setn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    let top_val: X86Opnd = ctx.stack_pop(0);
+    let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap());
+    mov(cb, REG0, top_val);
+    mov(cb, dst_opnd, REG0);
+
+    let mapping = ctx.get_opnd_mapping(StackOpnd(0));
+    ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping);
+
+    KeepCompiling
+}
+
+// get nth stack value, then push it
+fn gen_topn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    let top_n_val = ctx.stack_opnd(n.try_into().unwrap());
+    let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap()));
+
+    let loc0 = ctx.stack_push_mapping(mapping);
+    mov(cb, REG0, top_n_val);
+    mov(cb, loc0, REG0);
+
+    KeepCompiling
+}
+
+// Pop n values off the stack
+fn gen_adjuststack(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    ctx.stack_pop(n);
+    KeepCompiling
+}
+
+fn gen_opt_plus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Add arg0 + arg1 and test for overflow
+        mov(cb, REG0, arg0);
+        sub(cb, REG0, imm_opnd(1));
+        add(cb, REG0, arg1);
+        jo_ptr(cb, side_exit);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+// new array initialized from top N values
+fn gen_newarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let n = jit_get_arg(jit, 0).as_u32();
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let offset_magnitude = SIZEOF_VALUE as u32 * n;
+    let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize));
+
+    // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], imm_opnd(n.into()));
+    lea(cb, C_ARG_REGS[2], values_ptr);
+    call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8);
+
+    ctx.stack_pop(n.as_usize());
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// dup array
+fn gen_duparray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let ary = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // call rb_ary_resurrect(VALUE ary);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
+    call_ptr(cb, REG0, rb_ary_resurrect as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// dup hash
+fn gen_duphash(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let hash = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // call rb_hash_resurrect(VALUE hash);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
+    call_ptr(cb, REG0, rb_hash_resurrect as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Hash);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// call to_a on the array on the stack
+fn gen_splatarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let ary_opnd = ctx.stack_pop(1);
+
+    // Call rb_vm_splat_array(flag, ary)
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
+    mov(cb, C_ARG_REGS[1], ary_opnd);
+    call_ptr(cb, REG1, rb_vm_splat_array as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// new range initialized from top 2 values
+fn gen_newrange(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 0);
+
+    // rb_range_new() allocates and can raise
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // val = rb_range_new(low, high, (int)flag);
+    mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1));
+    mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0));
+    mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into()));
+    call_ptr(cb, REG0, rb_range_new as *const u8);
+
+    ctx.stack_pop(2);
+    let stack_ret = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn guard_object_is_heap(
+    cb: &mut CodeBlock,
+    object_opnd: X86Opnd,
+    _ctx: &mut Context,
+    side_exit: CodePtr,
+) {
+    add_comment(cb, "guard object is heap");
+
+    // Test that the object is not an immediate
+    test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+    jnz_ptr(cb, side_exit);
+
+    // Test that the object is not false or nil
+    cmp(cb, object_opnd, uimm_opnd(Qnil.into()));
+    jbe_ptr(cb, side_exit);
+}
+
+fn guard_object_is_array(
+    cb: &mut CodeBlock,
+    object_opnd: X86Opnd,
+    flags_opnd: X86Opnd,
+    _ctx: &mut Context,
+    side_exit: CodePtr,
+) {
+    add_comment(cb, "guard object is array");
+
+    // Pull out the type mask
+    mov(
+        cb,
+        flags_opnd,
+        mem_opnd(
+            8 * SIZEOF_VALUE as u8,
+            object_opnd,
+            RUBY_OFFSET_RBASIC_FLAGS,
+        ),
+    );
+    and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64));
+
+    // Compare the result with T_ARRAY
+    cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64));
+    jne_ptr(cb, side_exit);
+}
+
+// push enough nils onto the stack to fill out an array
+fn gen_expandarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 1);
+    let VALUE(flag_value) = flag;
+
+    // If this instruction has the splat flag, then bail out.
+    if flag_value & 0x01 != 0 {
+        incr_counter!(expandarray_splat);
+        return CantCompile;
+    }
+
+    // If this instruction has the postarg flag, then bail out.
+    if flag_value & 0x02 != 0 {
+        incr_counter!(expandarray_postarg);
+        return CantCompile;
+    }
+
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // num is the number of requested values. If there aren't enough in the
+    // array then we're going to push on nils.
+    let num = jit_get_arg(jit, 0);
+    let array_type = ctx.get_opnd_type(StackOpnd(0));
+    let array_opnd = ctx.stack_pop(1);
+
+    if matches!(array_type, Type::Nil) {
+        // special case for a, b = nil pattern
+        // push N nils onto the stack
+        for _i in 0..(num.into()) {
+            let push_opnd = ctx.stack_push(Type::Nil);
+            mov(cb, push_opnd, uimm_opnd(Qnil.into()));
+        }
+        return KeepCompiling;
+    }
+
+    // Move the array from the stack into REG0 and check that it's an array.
+    mov(cb, REG0, array_opnd);
+    guard_object_is_heap(
+        cb,
+        REG0,
+        ctx,
+        counted_exit!(ocb, side_exit, expandarray_not_array),
+    );
+    guard_object_is_array(
+        cb,
+        REG0,
+        REG1,
+        ctx,
+        counted_exit!(ocb, side_exit, expandarray_not_array),
+    );
+
+    // If we don't actually want any values, then just return.
+    if num == VALUE(0) {
+        return KeepCompiling;
+    }
+
+    // Pull out the embed flag to check if it's an embedded array.
+    let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+    mov(cb, REG1, flags_opnd);
+
+    // Move the length of the embedded array into REG1.
+    and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64));
+    shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64));
+
+    // Conditionally move the length of the heap array into REG1.
+    test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
+    let array_len_opnd = mem_opnd(
+        (8 * size_of::<std::os::raw::c_long>()) as u8,
+        REG0,
+        RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
+    );
+    cmovz(cb, REG1, array_len_opnd);
+
+    // Only handle the case where the number of values in the array is greater
+    // than or equal to the number of values requested.
+    cmp(cb, REG1, uimm_opnd(num.into()));
+    jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small));
+
+    // Load the address of the embedded array into REG1.
+    // (struct RArray *)(obj)->as.ary
+    let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY);
+    lea(cb, REG1, ary_opnd);
+
+    // Conditionally load the address of the heap array into REG1.
+    // (struct RArray *)(obj)->as.heap.ptr
+    test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
+    let heap_ptr_opnd = mem_opnd(
+        (8 * size_of::<usize>()) as u8,
+        REG0,
+        RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
+    );
+    cmovz(cb, REG1, heap_ptr_opnd);
+
+    // Loop backward through the array and push each element onto the stack.
+    for i in (0..(num.as_i32())).rev() {
+        let top = ctx.stack_push(Type::Unknown);
+        mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32)));
+        mov(cb, top, REG0);
+    }
+
+    KeepCompiling
+}
+
+fn gen_getlocal_wc0(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Compute the offset from BP to the local
+    let slot_idx = jit_get_arg(jit, 0).as_i32();
+    let offs: i32 = -(SIZEOF_VALUE as i32) * slot_idx;
+    let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx);
+
+    // Load environment pointer EP (level 0) from CFP
+    gen_get_ep(cb, REG0, 0);
+
+    // Load the local from the EP
+    mov(cb, REG0, mem_opnd(64, REG0, offs));
+
+    // Write the local at SP
+    let stack_top = ctx.stack_push_local(local_idx.as_usize());
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+// Compute the index of a local variable from its slot index
+fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 {
+    // Layout illustration
+    // This is an array of VALUE
+    //                                           | VM_ENV_DATA_SIZE |
+    //                                           v                  v
+    // low addr <+-------+-------+-------+-------+------------------+
+    //           |local 0|local 1|  ...  |local n|       ....       |
+    //           +-------+-------+-------+-------+------------------+
+    //           ^       ^                       ^                  ^
+    //           +-------+---local_table_size----+         cfp->ep--+
+    //                   |                                          |
+    //                   +------------------slot_idx----------------+
+    //
+    // See usages of local_var_name() from iseq.c for similar calculation.
+
+    // Equivalent of iseq->body->local_table_size
+    let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) }
+        .try_into()
+        .unwrap();
+    let op = slot_idx - (VM_ENV_DATA_SIZE as i32);
+    let local_idx = local_table_size - op - 1;
+    assert!(local_idx >= 0 && local_idx < local_table_size);
+    local_idx.try_into().unwrap()
+}
+
+// Get EP at level from CFP
+fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) {
+    // Load environment pointer EP from CFP
+    let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP);
+    mov(cb, reg, ep_opnd);
+
+    for _ in (0..level).rev() {
+        // Get the previous EP from the current EP
+        // See GET_PREV_EP(ep) macro
+        // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
+        let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32);
+        mov(cb, reg, mem_opnd(64, REG0, offs));
+        and(cb, reg, imm_opnd(!0x03));
+    }
+}
+
+fn gen_getlocal_generic(
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    local_idx: u32,
+    level: u32,
+) -> CodegenStatus {
+    gen_get_ep(cb, REG0, level);
+
+    // Load the local from the block
+    // val = *(vm_get_ep(GET_EP(), level) - idx);
+    let offs = -(SIZEOF_VALUE as i32 * local_idx as i32);
+    mov(cb, REG0, mem_opnd(64, REG0, offs));
+
+    // Write the local at SP
+    let stack_top = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_getlocal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0);
+    let level = jit_get_arg(jit, 1);
+    gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32())
+}
+
+fn gen_getlocal_wc1(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0);
+    gen_getlocal_generic(ctx, cb, idx.as_u32(), 1)
+}
+
+fn gen_setlocal_wc0(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    /*
+    vm_env_write(const VALUE *ep, int index, VALUE v)
+    {
+        VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
+        if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
+            VM_STACK_ENV_WRITE(ep, index, v);
+        }
+        else {
+            vm_env_write_slowpath(ep, index, v);
+        }
+    }
+    */
+
+    let slot_idx = jit_get_arg(jit, 0).as_i32();
+    let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx).as_usize();
+
+    // Load environment pointer EP (level 0) from CFP
+    gen_get_ep(cb, REG0, 0);
+
+    // flags & VM_ENV_FLAG_WB_REQUIRED
+    let flags_opnd = mem_opnd(
+        64,
+        REG0,
+        SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+    );
+    test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64));
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
+    jnz_ptr(cb, side_exit);
+
+    // Set the type of the local variable in the context
+    let temp_type = ctx.get_opnd_type(StackOpnd(0));
+    ctx.set_local_type(local_idx, temp_type);
+
+    // Pop the value to write from the stack
+    let stack_top = ctx.stack_pop(1);
+    mov(cb, REG1, stack_top);
+
+    // Write the value at the environment pointer
+    let offs: i32 = -8 * slot_idx;
+    mov(cb, mem_opnd(64, REG0, offs), REG1);
+
+    KeepCompiling
+}
+
+fn gen_setlocal_generic(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    local_idx: i32,
+    level: u32,
+) -> CodegenStatus {
+    // Load environment pointer EP at level
+    gen_get_ep(cb, REG0, level);
+
+    // flags & VM_ENV_FLAG_WB_REQUIRED
+    let flags_opnd = mem_opnd(
+        64,
+        REG0,
+        SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+    );
+    test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into()));
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
+    jnz_ptr(cb, side_exit);
+
+    // Pop the value to write from the stack
+    let stack_top = ctx.stack_pop(1);
+    mov(cb, REG1, stack_top);
+
+    // Write the value at the environment pointer
+    let offs = -(SIZEOF_VALUE as i32 * local_idx);
+    mov(cb, mem_opnd(64, REG0, offs), REG1);
+
+    KeepCompiling
+}
+
+fn gen_setlocal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0).as_i32();
+    let level = jit_get_arg(jit, 1).as_u32();
+    gen_setlocal_generic(jit, ctx, cb, ocb, idx, level)
+}
+
+fn gen_setlocal_wc1(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0).as_i32();
+    gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1)
+}
+
+// new hash initialized from top N values
+fn gen_newhash(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let num: i64 = jit_get_arg(jit, 0).as_i64();
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    if num != 0 {
+        // val = rb_hash_new_with_size(num / 2);
+        mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
+        call_ptr(cb, REG0, rb_hash_new_with_size as *const u8);
+
+        // save the allocated hash as we want to push it after insertion
+        push(cb, RAX);
+        push(cb, RAX); // alignment
+
+        // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
+        mov(cb, C_ARG_REGS[0], imm_opnd(num));
+        lea(
+            cb,
+            C_ARG_REGS[1],
+            ctx.stack_opnd((num - 1).try_into().unwrap()),
+        );
+        mov(cb, C_ARG_REGS[2], RAX);
+        call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8);
+
+        pop(cb, RAX); // alignment
+        pop(cb, RAX);
+
+        ctx.stack_pop(num.try_into().unwrap());
+        let stack_ret = ctx.stack_push(Type::Hash);
+        mov(cb, stack_ret, RAX);
+    } else {
+        // val = rb_hash_new();
+        call_ptr(cb, REG0, rb_hash_new as *const u8);
+
+        let stack_ret = ctx.stack_push(Type::Hash);
+        mov(cb, stack_ret, RAX);
+    }
+
+    KeepCompiling
+}
+
+fn gen_putstring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let put_val = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because the callee will allocate
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
+    call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8);
+
+    let stack_top = ctx.stack_push(Type::String);
+    mov(cb, stack_top, RAX);
+
+    KeepCompiling
+}
+
+// Push Qtrue or Qfalse depending on whether the given keyword was supplied by
+// the caller
+fn gen_checkkeyword(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // When a keyword is unspecified past index 32, a hash will be used
+    // instead. This can only happen in iseqs taking more than 32 keywords.
+    if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } {
+        return CantCompile;
+    }
+
+    // The EP offset to the undefined bits local
+    let bits_offset = jit_get_arg(jit, 0).as_i32();
+
+    // The index of the keyword we want to check
+    let index: i64 = jit_get_arg(jit, 1).as_i64();
+
+    // Load environment pointer EP
+    gen_get_ep(cb, REG0, 0);
+
+    // VALUE kw_bits = *(ep - bits);
+    let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset);
+
+    // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
+    // if ((b & (0x01 << idx))) {
+    //
+    // We can skip the FIX2ULONG conversion by shifting the bit we test
+    let bit_test: i64 = 0x01 << (index + 1);
+    test(cb, bits_opnd, imm_opnd(bit_test));
+    mov(cb, REG0, uimm_opnd(Qfalse.into()));
+    mov(cb, REG1, uimm_opnd(Qtrue.into()));
+    cmovz(cb, REG0, REG1);
+
+    let stack_ret = ctx.stack_push(Type::UnknownImm);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_jnz_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jnz_ptr(cb, target0),
+    }
+}
+
+fn gen_jz_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jz_ptr(cb, target0),
+    }
+}
+
+fn gen_jbe_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jbe_ptr(cb, target0),
+    }
+}
+
+// Generate a jump to a stub that recompiles the current YARV instruction on failure.
+// When depth_limitk is exceeded, generate a jump to a side exit.
+fn jit_chain_guard(
+    jcc: JCCKinds,
+    jit: &JITState,
+    ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    depth_limit: i32,
+    side_exit: CodePtr,
+) {
+    let target0_gen_fn = match jcc {
+        JCC_JNE | JCC_JNZ => gen_jnz_to_target0,
+        JCC_JZ | JCC_JE => gen_jz_to_target0,
+        JCC_JBE | JCC_JNA => gen_jbe_to_target0,
+    };
+
+    if (ctx.get_chain_depth() as i32) < depth_limit {
+        let mut deeper = ctx.clone();
+        deeper.increment_chain_depth();
+        let bid = BlockId {
+            iseq: jit.iseq,
+            idx: jit.insn_idx,
+        };
+
+        gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn);
+    } else {
+        target0_gen_fn(cb, side_exit, None, BranchShape::Default);
+    }
+}
+
+// up to 5 different classes, and embedded or not for each
+pub const GET_IVAR_MAX_DEPTH: i32 = 10;
+
+// hashes and arrays
+pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;
+
+// up to 5 different classes
+pub const SEND_MAX_DEPTH: i32 = 5;
+
+// Codegen for setting an instance variable.
+// Preconditions:
+//   - receiver is in REG0
+//   - receiver has the same class as CLASS_OF(comptime_receiver)
+//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
+fn gen_set_ivar(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    recv: VALUE,
+    ivar_name: ID,
+) -> CodegenStatus {
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let val_opnd = ctx.stack_pop(1);
+    let recv_opnd = ctx.stack_pop(1);
+
+    let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) };
+
+    // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
+    mov(cb, C_ARG_REGS[0], recv_opnd);
+    mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into()));
+    mov(cb, C_ARG_REGS[2], val_opnd);
+    call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8);
+
+    let out_opnd = ctx.stack_push(Type::Unknown);
+    mov(cb, out_opnd, RAX);
+
+    KeepCompiling
+}
+
+// Codegen for getting an instance variable.
+// Preconditions:
+//   - receiver is in REG0
+//   - receiver has the same class as CLASS_OF(comptime_receiver)
+//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
+fn gen_get_ivar(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    max_chain_depth: i32,
+    comptime_receiver: VALUE,
+    ivar_name: ID,
+    reg0_opnd: InsnOpnd,
+    side_exit: CodePtr,
+) -> CodegenStatus {
+    let comptime_val_klass = comptime_receiver.class_of();
+    let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard
+
+    // Check if the comptime class uses a custom allocator
+    let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
+    let uses_custom_allocator = match custom_allocator {
+        Some(alloc_fun) => {
+            let allocate_instance = rb_class_allocate_instance as *const u8;
+            alloc_fun as *const u8 != allocate_instance
+        }
+        None => false,
+    };
+
+    // Check if the comptime receiver is a T_OBJECT
+    let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
+
+    // If the class uses the default allocator, instances should all be T_OBJECT
+    // NOTE: This assumes nobody changes the allocator of the class after allocation.
+    //       Eventually, we can encode whether an object is T_OBJECT or not
+    //       inside object shapes.
+    if !receiver_t_object || uses_custom_allocator {
+        // General case. Call rb_ivar_get().
+        // VALUE rb_ivar_get(VALUE obj, ID id)
+        add_comment(cb, "call rb_ivar_get()");
+
+        // The function could raise exceptions.
+        jit_prepare_routine_call(jit, ctx, cb, REG1);
+
+        mov(cb, C_ARG_REGS[0], REG0);
+        mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name));
+        call_ptr(cb, REG1, rb_ivar_get as *const u8);
+
+        if reg0_opnd != SelfOpnd {
+            ctx.stack_pop(1);
+        }
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, RAX);
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    /*
+    // FIXME:
+    // This check was added because of a failure in a test involving the
+    // Nokogiri Document class where we see a T_DATA that still has the default
+    // allocator.
+    // Aaron Patterson argues that this is a bug in the C extension, because
+    // people could call .allocate() on the class and still get a T_OBJECT
+    // For now I added an extra dynamic check that the receiver is T_OBJECT
+    // so we can safely pass all the tests in Shopify Core.
+    //
+    // Guard that the receiver is T_OBJECT
+    // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
+    add_comment(cb, "guard receiver is T_OBJECT");
+    mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
+    and(cb, REG1, imm_opnd(RUBY_T_MASK));
+    cmp(cb, REG1, imm_opnd(T_OBJECT));
+    jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit);
+    */
+
+    // FIXME: Mapping the index could fail when there is too many ivar names. If we're
+    // compiling for a branch stub that can cause the exception to be thrown from the
+    // wrong PC.
+    let ivar_index =
+        unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize();
+
+    // Pop receiver if it's on the temp stack
+    if reg0_opnd != SelfOpnd {
+        ctx.stack_pop(1);
+    }
+
+    // Compile time self is embedded and the ivar index lands within the object
+    let test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED)) != VALUE(0) };
+    if test_result && ivar_index < ROBJECT_EMBED_LEN_MAX {
+        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
+
+        // Guard that self is embedded
+        // TODO: BT and JC is shorter
+        add_comment(cb, "guard embedded getivar");
+        let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+        test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
+        let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
+        jit_chain_guard(
+            JCC_JZ,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            max_chain_depth,
+            side_exit,
+        );
+
+        // Load the variable
+        let offs = RUBY_OFFSET_ROBJECT_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32;
+        let ivar_opnd = mem_opnd(64, REG0, offs);
+        mov(cb, REG1, ivar_opnd);
+
+        // Guard that the variable is not Qundef
+        cmp(cb, REG1, uimm_opnd(Qundef.into()));
+        mov(cb, REG0, uimm_opnd(Qnil.into()));
+        cmove(cb, REG1, REG0);
+
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, REG1);
+    } else {
+        // Compile time value is *not* embeded.
+
+        // Guard that value is *not* embedded
+        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
+        add_comment(cb, "guard extended getivar");
+        let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+        test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
+        let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
+        jit_chain_guard(
+            JCC_JNZ,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            max_chain_depth,
+            side_exit,
+        );
+
+        // Check that the extended table is big enough
+        if ivar_index >= ROBJECT_EMBED_LEN_MAX + 1 {
+            // Check that the slot is inside the extended table (num_slots > index)
+            let num_slots = mem_opnd(32, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV);
+
+            cmp(cb, num_slots, uimm_opnd(ivar_index as u64));
+            jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range));
+        }
+
+        // Get a pointer to the extended table
+        let tbl_opnd = mem_opnd(64, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR);
+        mov(cb, REG0, tbl_opnd);
+
+        // Read the ivar from the extended table
+        let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32);
+        mov(cb, REG0, ivar_opnd);
+
+        // Check that the ivar is not Qundef
+        cmp(cb, REG0, uimm_opnd(Qundef.into()));
+        mov(cb, REG1, uimm_opnd(Qnil.into()));
+        cmove(cb, REG0, REG1);
+
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, REG0);
+    }
+
+    // Jump to next instruction. This allows guard chains to share the same successor.
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_getinstancevariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let ivar_name = jit_get_arg(jit, 0).as_u64();
+
+    let comptime_val = jit_peek_at_self(jit);
+    let comptime_val_klass = comptime_val.class_of();
+
+    // Generate a side exit
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Guard that the receiver has the same class as the one from compile time.
+    mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
+
+    jit_guard_known_klass(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        comptime_val_klass,
+        SelfOpnd,
+        comptime_val,
+        GET_IVAR_MAX_DEPTH,
+        side_exit,
+    );
+
+    gen_get_ivar(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        GET_IVAR_MAX_DEPTH,
+        comptime_val,
+        ivar_name,
+        SelfOpnd,
+        side_exit,
+    )
+}
+
+fn gen_setinstancevariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let id = jit_get_arg(jit, 0);
+    let ic = jit_get_arg(jit, 1).as_u64(); // type IVC
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let val_opnd = ctx.stack_pop(1);
+
+    // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+    mov(cb, C_ARG_REGS[3], val_opnd);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(id.into()));
+    mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8));
+    let iseq = VALUE(jit.iseq as usize);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq);
+    call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_defined(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let op_type = jit_get_arg(jit, 0);
+    let obj = jit_get_arg(jit, 1);
+    let pushval = jit_get_arg(jit, 2);
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let v_opnd = ctx.stack_pop(1);
+
+    // Call vm_defined(ec, reg_cfp, op_type, obj, v)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into()));
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj);
+    mov(cb, C_ARG_REGS[4], v_opnd);
+    call_ptr(cb, REG0, rb_vm_defined as *const u8);
+
+    // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
+    //  val = pushval;
+    // }
+    jit_mov_gc_ptr(jit, cb, REG1, pushval);
+    cmp(cb, AL, imm_opnd(0));
+    mov(cb, RAX, uimm_opnd(Qnil.into()));
+    cmovnz(cb, RAX, REG1);
+
+    // Push the return value onto the stack
+    let out_type = if pushval.special_const_p() {
+        Type::UnknownImm
+    } else {
+        Type::Unknown
+    };
+    let stack_ret = ctx.stack_push(out_type);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_checktype(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let type_val = jit_get_arg(jit, 0).as_u32();
+
+    // Only three types are emitted by compile.c at the moment
+    if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val {
+        let val_type = ctx.get_opnd_type(StackOpnd(0));
+        let val = ctx.stack_pop(1);
+
+        // Check if we know from type information
+        match (type_val, val_type) {
+            (RUBY_T_STRING, Type::String)
+            | (RUBY_T_ARRAY, Type::Array)
+            | (RUBY_T_HASH, Type::Hash) => {
+                // guaranteed type match
+                let stack_ret = ctx.stack_push(Type::True);
+                mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64()));
+                return KeepCompiling;
+            }
+            _ if val_type.is_imm() || val_type.is_specific() => {
+                // guaranteed not to match T_STRING/T_ARRAY/T_HASH
+                let stack_ret = ctx.stack_push(Type::False);
+                mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64()));
+                return KeepCompiling;
+            }
+            _ => (),
+        }
+
+        mov(cb, REG0, val);
+        mov(cb, REG1, uimm_opnd(Qfalse.as_u64()));
+
+        let ret = cb.new_label("ret".to_string());
+
+        if !val_type.is_heap() {
+            // if (SPECIAL_CONST_P(val)) {
+            // Return Qfalse via REG1 if not on heap
+            test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+            jnz_label(cb, ret);
+            cmp(cb, REG0, uimm_opnd(Qnil.as_u64()));
+            jbe_label(cb, ret);
+        }
+
+        // Check type on object
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS));
+        and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64));
+        cmp(cb, REG0, uimm_opnd(type_val as u64));
+        mov(cb, REG0, uimm_opnd(Qtrue.as_u64()));
+        // REG1 contains Qfalse from above
+        cmove(cb, REG1, REG0);
+
+        cb.write_label(ret);
+        let stack_ret = ctx.stack_push(Type::UnknownImm);
+        mov(cb, stack_ret, REG1);
+        cb.link_labels();
+
+        KeepCompiling
+    } else {
+        CantCompile
+    }
+}
+
+fn gen_concatstrings(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let n = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize()));
+
+    // call rb_str_concat_literals(long n, const VALUE *strings);
+    mov(cb, C_ARG_REGS[0], imm_opnd(n.into()));
+    lea(cb, C_ARG_REGS[1], values_ptr);
+    call_ptr(cb, REG0, rb_str_concat_literals as *const u8);
+
+    ctx.stack_pop(n.as_usize());
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) {
+    // Get the stack operand types
+    let arg1_type = ctx.get_opnd_type(StackOpnd(0));
+    let arg0_type = ctx.get_opnd_type(StackOpnd(1));
+
+    if arg0_type.is_heap() || arg1_type.is_heap() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    if arg0_type != Type::Fixnum && arg0_type.is_specific() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    if arg1_type != Type::Fixnum && arg0_type.is_specific() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    assert!(!arg0_type.is_heap());
+    assert!(!arg1_type.is_heap());
+    assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown());
+    assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown());
+
+    // Get stack operands without popping them
+    let arg1 = ctx.stack_opnd(0);
+    let arg0 = ctx.stack_opnd(1);
+
+    // If not fixnums, fall back
+    if arg0_type != Type::Fixnum {
+        add_comment(cb, "guard arg0 fixnum");
+        test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, side_exit);
+    }
+    if arg1_type != Type::Fixnum {
+        add_comment(cb, "guard arg1 fixnum");
+        test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, side_exit);
+    }
+
+    // Set stack types in context
+    ctx.upgrade_opnd_type(StackOpnd(0), Type::Fixnum);
+    ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum);
+}
+
+// Conditional move operation used by comparison operators
+type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> ();
+
+fn gen_fixnum_cmp(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cmov_op: CmovFn,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LT) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Compare the arguments
+        xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
+        mov(cb, REG1, arg0);
+        cmp(cb, REG1, arg1);
+        mov(cb, REG1, uimm_opnd(Qtrue.into()));
+        cmov_op(cb, REG0, REG1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Unknown);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_lt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl)
+}
+
+fn gen_opt_le(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle)
+}
+
+fn gen_opt_ge(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge)
+}
+
+fn gen_opt_gt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg)
+}
+
+// Implements specialized equality for either two fixnum or two strings
+// Returns true if code was generated, otherwise false
+fn gen_equality_specialized(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    side_exit: CodePtr,
+) -> bool {
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    let a_opnd = ctx.stack_opnd(1);
+    let b_opnd = ctx.stack_opnd(0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
+            // if overridden, emit the generic version
+            return false;
+        }
+
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        mov(cb, REG0, a_opnd);
+        cmp(cb, REG0, b_opnd);
+
+        mov(cb, REG0, imm_opnd(Qfalse.into()));
+        mov(cb, REG1, imm_opnd(Qtrue.into()));
+        cmove(cb, REG0, REG1);
+
+        // Push the output on the stack
+        ctx.stack_pop(2);
+        let dst = ctx.stack_push(Type::UnknownImm);
+        mov(cb, dst, REG0);
+
+        true
+    } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString }
+    {
+        if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
+            // if overridden, emit the generic version
+            return false;
+        }
+
+        // Load a and b in preparation for call later
+        mov(cb, C_ARG_REGS[0], a_opnd);
+        mov(cb, C_ARG_REGS[1], b_opnd);
+
+        // Guard that a is a String
+        mov(cb, REG0, C_ARG_REGS[0]);
+        unsafe {
+            // Use of rb_cString here requires an unsafe block
+            jit_guard_known_klass(
+                jit,
+                ctx,
+                cb,
+                ocb,
+                rb_cString,
+                StackOpnd(1),
+                comptime_a,
+                SEND_MAX_DEPTH,
+                side_exit,
+            );
+        }
+
+        let ret = cb.new_label("ret".to_string());
+
+        // If they are equal by identity, return true
+        cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
+        mov(cb, RAX, imm_opnd(Qtrue.into()));
+        je_label(cb, ret);
+
+        // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
+        if ctx.get_opnd_type(StackOpnd(0)) != Type::String {
+            mov(cb, REG0, C_ARG_REGS[1]);
+            // Note: any T_STRING is valid here, but we check for a ::String for simplicity
+            // To pass a mutable static variable (rb_cString) requires an unsafe block
+            unsafe {
+                jit_guard_known_klass(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    rb_cString,
+                    StackOpnd(0),
+                    comptime_b,
+                    SEND_MAX_DEPTH,
+                    side_exit,
+                );
+            }
+        }
+
+        // Call rb_str_eql_internal(a, b)
+        call_ptr(cb, REG0, rb_str_eql_internal as *const u8);
+
+        // Push the output on the stack
+        cb.write_label(ret);
+        ctx.stack_pop(2);
+        let dst = ctx.stack_push(Type::UnknownImm);
+        mov(cb, dst, RAX);
+        cb.link_labels();
+
+        true
+    } else {
+        false
+    }
+}
+
+fn gen_opt_eq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) {
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_neq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // opt_neq is passed two rb_call_data as arguments:
+    // first for ==, second for !=
+    let cd = jit_get_arg(jit, 1).as_ptr();
+    return gen_send_general(jit, ctx, cb, ocb, cd, None);
+}
+
+fn gen_opt_aref(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
+    let argc = unsafe { vm_ci_argc((*cd).ci) };
+
+    // Only JIT one arg calls like `ary[6]`
+    if argc != 1 {
+        gen_counter_incr!(cb, oaref_argc_not_one);
+        return CantCompile;
+    }
+
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    // Remember the context on entry for adding guard chains
+    let starting_context = ctx.clone();
+
+    // Specialize base on compile time values
+    let comptime_idx = jit_peek_at_stack(jit, ctx, 0);
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 1);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() {
+        if !assume_bop_not_redefined(jit, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
+            return CantCompile;
+        }
+
+        // Pop the stack operands
+        let idx_opnd = ctx.stack_pop(1);
+        let recv_opnd = ctx.stack_pop(1);
+        mov(cb, REG0, recv_opnd);
+
+        // if (SPECIAL_CONST_P(recv)) {
+        // Bail if receiver is not a heap object
+        test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+        jnz_ptr(cb, side_exit);
+        cmp(cb, REG0, uimm_opnd(Qfalse.into()));
+        je_ptr(cb, side_exit);
+        cmp(cb, REG0, uimm_opnd(Qnil.into()));
+        je_ptr(cb, side_exit);
+
+        // Bail if recv has a class other than ::Array.
+        // BOP_AREF check above is only good for ::Array.
+        mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS));
+        mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into()));
+        cmp(cb, REG0, REG1);
+        jit_chain_guard(
+            JCC_JNE,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            OPT_AREF_MAX_CHAIN_DEPTH,
+            side_exit,
+        );
+
+        // Bail if idx is not a FIXNUM
+        mov(cb, REG1, idx_opnd);
+        test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum));
+
+        // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
+        // It never raises or allocates, so we don't need to write to cfp->pc.
+        {
+            mov(cb, RDI, recv_opnd);
+            sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int
+            mov(cb, RSI, REG1);
+            call_ptr(cb, REG0, rb_ary_entry_internal as *const u8);
+
+            // Push the return value onto the stack
+            let stack_ret = ctx.stack_push(Type::Unknown);
+            mov(cb, stack_ret, RAX);
+        }
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    } else if comptime_recv.class_of() == unsafe { rb_cHash } {
+        if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
+            return CantCompile;
+        }
+
+        let key_opnd = ctx.stack_opnd(0);
+        let recv_opnd = ctx.stack_opnd(1);
+
+        // Guard that the receiver is a hash
+        mov(cb, REG0, recv_opnd);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cHash },
+            StackOpnd(1),
+            comptime_recv,
+            OPT_AREF_MAX_CHAIN_DEPTH,
+            side_exit,
+        );
+
+        // Setup arguments for rb_hash_aref().
+        mov(cb, C_ARG_REGS[0], REG0);
+        mov(cb, C_ARG_REGS[1], key_opnd);
+
+        // Prepare to call rb_hash_aref(). It might call #hash on the key.
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_hash_aref as *const u8);
+
+        // Pop the key and the reciever
+        ctx.stack_pop(2);
+
+        // Push the return value onto the stack
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        // General case. Call the [] method.
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_aset(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 2);
+    let comptime_key = jit_peek_at_stack(jit, ctx, 1);
+
+    // Get the operands from the stack
+    let recv = ctx.stack_opnd(2);
+    let key = ctx.stack_opnd(1);
+    let val = ctx.stack_opnd(0);
+
+    if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Guard receiver is an Array
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cArray },
+            StackOpnd(2),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Guard key is a fixnum
+        mov(cb, REG0, key);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cInteger },
+            StackOpnd(1),
+            comptime_key,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Call rb_ary_store
+        mov(cb, C_ARG_REGS[0], recv);
+        mov(cb, C_ARG_REGS[1], key);
+        sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key)
+        mov(cb, C_ARG_REGS[2], val);
+
+        // We might allocate or raise
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_ary_store as *const u8);
+
+        // rb_ary_store returns void
+        // stored value should still be on stack
+        mov(cb, REG0, ctx.stack_opnd(0));
+
+        // Push the return value onto the stack
+        ctx.stack_pop(3);
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, REG0);
+
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    } else if comptime_recv.class_of() == unsafe { rb_cHash } {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Guard receiver is a Hash
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cHash },
+            StackOpnd(2),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Call rb_hash_aset
+        mov(cb, C_ARG_REGS[0], recv);
+        mov(cb, C_ARG_REGS[1], key);
+        mov(cb, C_ARG_REGS[2], val);
+
+        // We might allocate or raise
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_hash_aset as *const u8);
+
+        // Push the return value onto the stack
+        ctx.stack_pop(3);
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_and(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Do the bitwise and arg0 & arg1
+        mov(cb, REG0, arg0);
+        and(cb, REG0, arg1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_or(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Do the bitwise or arg0 | arg1
+        mov(cb, REG0, arg0);
+        or(cb, REG0, arg1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_minus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Subtract arg0 - arg1 and test for overflow
+        mov(cb, REG0, arg0);
+        sub(cb, REG0, arg1);
+        jo_ptr(cb, side_exit);
+        add(cb, REG0, imm_opnd(1));
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_mult(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_div(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_mod(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because the callee may allocate bignums
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Get the operands from the stack
+    let arg1 = ctx.stack_pop(1);
+    let arg0 = ctx.stack_pop(1);
+
+    // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
+    mov(cb, C_ARG_REGS[0], arg0);
+    mov(cb, C_ARG_REGS[1], arg1);
+    call_ptr(cb, REG0, rb_vm_opt_mod as *const u8);
+
+    // If val == Qundef, bail to do a method call
+    cmp(cb, RAX, imm_opnd(Qundef.as_i64()));
+    je_ptr(cb, side_exit);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_opt_ltlt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_nil_p(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_empty_p(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_str_freeze(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
+        return CantCompile;
+    }
+
+    let str = jit_get_arg(jit, 0);
+    jit_mov_gc_ptr(jit, cb, REG0, str);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_opt_str_uminus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
+        return CantCompile;
+    }
+
+    let str = jit_get_arg(jit, 0);
+    jit_mov_gc_ptr(jit, cb, REG0, str);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_opt_not(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_size(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_length(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_regexpmatch2(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_case_dispatch(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Normally this instruction would lookup the key in a hash and jump to an
+    // offset based on that.
+    // Instead we can take the fallback case and continue with the next
+    // instruciton.
+    // We'd hope that our jitted code will be sufficiently fast without the
+    // hash lookup, at least for small hashes, but it's worth revisiting this
+    // assumption in the future.
+
+    ctx.stack_pop(1);
+
+    KeepCompiling // continue with the next instruction
+}
+
+fn gen_branchif_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    assert!(target1 != None);
+    match shape {
+        BranchShape::Next0 => {
+            jz_ptr(cb, target1.unwrap());
+        }
+        BranchShape::Next1 => {
+            jnz_ptr(cb, target0);
+        }
+        BranchShape::Default => {
+            jnz_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchif(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if any bit (outside of the Qnil bit) is on
+    // RUBY_Qfalse  /* ...0000 0000 */
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit);
+    let jump_idx = (next_idx as i32) + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx,
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx as u32,
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchif_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_branchunless_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()),
+        BranchShape::Next1 => jz_ptr(cb, target0),
+        BranchShape::Default => {
+            jz_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchunless(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if any bit (outside of the Qnil bit) is on
+    // RUBY_Qfalse  /* ...0000 0000 */
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit) as i32;
+    let jump_idx = next_idx + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx.try_into().unwrap(),
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx.try_into().unwrap(),
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchunless_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_branchnil_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 => jne_ptr(cb, target1.unwrap()),
+        BranchShape::Next1 => je_ptr(cb, target0),
+        BranchShape::Default => {
+            je_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchnil(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if the value is Qnil
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    cmp(cb, val_opnd, uimm_opnd(Qnil.into()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit) as i32;
+    let jump_idx = next_idx + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx.try_into().unwrap(),
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx.try_into().unwrap(),
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchnil_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_jump(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Get the branch target instruction offsets
+    let jump_idx = (jit_next_insn_idx(jit) as i32) + jump_offset;
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx as u32,
+    };
+
+    // Generate the jump instruction
+    gen_direct_jump(jit, ctx, jump_block, cb);
+
+    EndBlock
+}
+
+/// Guard that self or a stack operand has the same class as `known_klass`, using
+/// `sample_instance` to speculate about the shape of the runtime value.
+/// FIXNUM and on-heap integers are treated as if they have distinct classes, and
+/// the guard generated for one will fail for the other.
+///
+/// Recompile as contingency if possible, or take side exit a last resort.
+
+fn jit_guard_known_klass(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    known_klass: VALUE,
+    insn_opnd: InsnOpnd,
+    sample_instance: VALUE,
+    max_chain_depth: i32,
+    side_exit: CodePtr,
+) -> bool {
+    let val_type = ctx.get_opnd_type(insn_opnd);
+
+    if unsafe { known_klass == rb_cNilClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::Nil {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is nil");
+            cmp(cb, REG0, imm_opnd(Qnil.into()));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::Nil);
+        }
+    } else if unsafe { known_klass == rb_cTrueClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::True {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is true");
+            cmp(cb, REG0, imm_opnd(Qtrue.into()));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::True);
+        }
+    } else if unsafe { known_klass == rb_cFalseClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::False {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is false");
+            assert!(Qfalse.as_i32() == 0);
+            test(cb, REG0, REG0);
+            jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::False);
+        }
+    } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() {
+        assert!(!val_type.is_heap());
+        // We will guard fixnum and bignum as though they were separate classes
+        // BIGNUM can be handled by the general else case below
+        if val_type != Type::Fixnum || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is fixnum");
+            test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64));
+            jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum);
+        }
+    } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() {
+        assert!(!val_type.is_heap());
+        // We will guard STATIC vs DYNAMIC as though they were separate classes
+        // DYNAMIC symbols can be handled by the general else case below
+        if val_type != Type::ImmSymbol || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is static symbol");
+            assert!(RUBY_SPECIAL_SHIFT == 8);
+            cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol);
+        }
+    } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() {
+        assert!(!val_type.is_heap());
+        if val_type != Type::Flonum || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            // We will guard flonum vs heap float as though they were separate classes
+            add_comment(cb, "guard object is flonum");
+            mov(cb, REG1, REG0);
+            and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64));
+            cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::Flonum);
+        }
+    } else if unsafe {
+        FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON)) != VALUE(0)
+            && sample_instance == rb_attr_get(known_klass, id__attached__ as ID)
+    } {
+        // Singleton classes are attached to one specific object, so we can
+        // avoid one memory access (and potentially the is_heap check) by
+        // looking for the expected object directly.
+        // Note that in case the sample instance has a singleton class that
+        // doesn't attach to the sample instance, it means the sample instance
+        // has an empty singleton class that hasn't been materialized yet. In
+        // this case, comparing against the sample instance doesn't gurantee
+        // that its singleton class is empty, so we can't avoid the memory
+        // access. As an example, `Object.new.singleton_class` is an object in
+        // this situation.
+        add_comment(cb, "guard known object with singleton class");
+        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
+        jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
+        cmp(cb, REG0, REG1);
+        jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+    } else {
+        assert!(!val_type.is_imm());
+
+        // Check that the receiver is a heap object
+        // Note: if we get here, the class doesn't have immediate instances.
+        if !val_type.is_heap() {
+            add_comment(cb, "guard not immediate");
+            assert!(Qfalse.as_i32() < Qnil.as_i32());
+            test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64));
+            jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            cmp(cb, REG0, imm_opnd(Qnil.into()));
+            jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap);
+        }
+
+        let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS);
+
+        // Bail if receiver class is different from known_klass
+        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
+        add_comment(cb, "guard known class");
+        jit_mov_gc_ptr(jit, cb, REG1, known_klass);
+        cmp(cb, klass_opnd, REG1);
+        jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+    }
+
+    true
+}
+
+// Generate ancestry guard for protected callee.
+// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
+fn jit_protected_callee_ancestry_guard(
+    jit: &mut JITState,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cme: *const rb_callable_method_entry_t,
+    side_exit: CodePtr,
+) {
+    // See vm_call_method().
+    mov(
+        cb,
+        C_ARG_REGS[0],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+    let def_class = unsafe { (*cme).defined_class };
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class);
+    // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
+    // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
+
+    call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8);
+    test(cb, RAX, RAX);
+    jz_ptr(
+        cb,
+        counted_exit!(ocb, side_exit, send_se_protected_check_failed),
+    );
+}
+
+// Codegen for rb_obj_not().
+// Note, caller is responsible for generating all the right guards, including
+// arity guards.
+fn jit_rb_obj_not(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    let recv_opnd = ctx.get_opnd_type(StackOpnd(0));
+
+    if recv_opnd == Type::Nil || recv_opnd == Type::False {
+        add_comment(cb, "rb_obj_not(nil_or_false)");
+        ctx.stack_pop(1);
+        let out_opnd = ctx.stack_push(Type::True);
+        mov(cb, out_opnd, uimm_opnd(Qtrue.into()));
+    } else if recv_opnd.is_heap() || recv_opnd.is_specific() {
+        // Note: recv_opnd != Type::Nil && recv_opnd != Type::False.
+        add_comment(cb, "rb_obj_not(truthy)");
+        ctx.stack_pop(1);
+        let out_opnd = ctx.stack_push(Type::False);
+        mov(cb, out_opnd, uimm_opnd(Qfalse.into()));
+    } else {
+        // jit_guard_known_klass() already ran on the receiver which should
+        // have deduced deduced the type of the receiver. This case should be
+        // rare if not unreachable.
+        return false;
+    }
+    true
+}
+
+// Codegen for rb_true()
+fn jit_rb_true(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "nil? == true");
+    ctx.stack_pop(1);
+    let stack_ret = ctx.stack_push(Type::True);
+    mov(cb, stack_ret, uimm_opnd(Qtrue.into()));
+    true
+}
+
+// Codegen for rb_false()
+fn jit_rb_false(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "nil? == false");
+    ctx.stack_pop(1);
+    let stack_ret = ctx.stack_push(Type::False);
+    mov(cb, stack_ret, uimm_opnd(Qfalse.into()));
+    true
+}
+
+// Codegen for rb_obj_equal()
+// object identity comparison
+fn jit_rb_obj_equal(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "equal?");
+    let obj1 = ctx.stack_pop(1);
+    let obj2 = ctx.stack_pop(1);
+
+    mov(cb, REG0, obj1);
+    cmp(cb, REG0, obj2);
+    mov(cb, REG0, uimm_opnd(Qtrue.into()));
+    mov(cb, REG1, uimm_opnd(Qfalse.into()));
+    cmovne(cb, REG0, REG1);
+
+    let stack_ret = ctx.stack_push(Type::UnknownImm);
+    mov(cb, stack_ret, REG0);
+    true
+}
+
+fn jit_rb_str_bytesize(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "String#bytesize");
+
+    let recv = ctx.stack_pop(1);
+    mov(cb, C_ARG_REGS[0], recv);
+    call_ptr(cb, REG0, rb_str_bytesize as *const u8);
+
+    let out_opnd = ctx.stack_push(Type::Fixnum);
+    mov(cb, out_opnd, RAX);
+
+    true
+}
+
+// Codegen for rb_str_to_s()
+// When String#to_s is called on a String instance, the method returns self and
+// most of the overhead comes from setting up the method call. We observed that
+// this situation happens a lot in some workloads.
+fn jit_rb_str_to_s(
+    _jit: &mut JITState,
+    _ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    known_recv_class: *const VALUE,
+) -> bool {
+    if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } {
+        add_comment(cb, "to_s on plain string");
+        // The method returns the receiver, which is already on the stack.
+        // No stack movement.
+        return true;
+    }
+    false
+}
+
+fn jit_thread_s_current(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "Thread.current");
+    ctx.stack_pop(1);
+
+    // ec->thread_ptr
+    let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR);
+    mov(cb, REG0, ec_thread_ptr);
+
+    // thread->self
+    let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF);
+    mov(cb, REG0, thread_self);
+
+    let stack_ret = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, stack_ret, REG0);
+    true
+}
+
+// Check if we know how to codegen for a particular cfunc method
+fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> {
+    let method_serial = unsafe { get_def_method_serial(def) };
+
+    CodegenGlobals::look_up_codegen_method(method_serial)
+}
+
+// Is anyone listening for :c_call and :c_return event currently?
+fn c_method_tracing_currently_enabled(jit: &JITState) -> bool {
+    // Defer to C implementation in yjit.c
+    unsafe {
+        rb_c_method_tracing_currently_enabled(jit.ec.unwrap() as *mut rb_execution_context_struct)
+    }
+}
+
+// Similar to args_kw_argv_to_hash. It is called at runtime from within the
+// generated assembly to build a Ruby hash of the passed keyword arguments. The
+// keys are the Symbol objects associated with the keywords and the values are
+// the actual values. In the representation, both keys and values are VALUEs.
+unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> VALUE {
+    let kw_arg = vm_ci_kwarg(ci);
+    let kw_len: usize = get_cikw_keyword_len(kw_arg).try_into().unwrap();
+    let hash = rb_hash_new_with_size(kw_len as u64);
+
+    for kwarg_idx in 0..kw_len {
+        let key = get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap());
+        let val = sp.sub(kw_len).add(kwarg_idx).read();
+        rb_hash_aset(hash, key, val);
+    }
+    hash
+}
+
+fn gen_send_cfunc(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+    recv_known_klass: *const VALUE,
+) -> CodegenStatus {
+    let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
+    let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+
+    // If the function expects a Ruby array of arguments
+    if cfunc_argc < 0 && cfunc_argc != -1 {
+        gen_counter_incr!(cb, send_cfunc_ruby_array_varg);
+        return CantCompile;
+    }
+
+    let kw_arg = unsafe { vm_ci_kwarg(ci) };
+    let kw_arg_num = if kw_arg.is_null() {
+        0
+    } else {
+        unsafe { get_cikw_keyword_len(kw_arg) }
+    };
+
+    // Number of args which will be passed through to the callee
+    // This is adjusted by the kwargs being combined into a hash.
+    let passed_argc = if kw_arg.is_null() {
+        argc
+    } else {
+        argc - kw_arg_num + 1
+    };
+
+    // If the argument count doesn't match
+    if cfunc_argc >= 0 && cfunc_argc != passed_argc {
+        gen_counter_incr!(cb, send_cfunc_argc_mismatch);
+        return CantCompile;
+    }
+
+    // Don't JIT functions that need C stack arguments for now
+    if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) {
+        gen_counter_incr!(cb, send_cfunc_toomany_args);
+        return CantCompile;
+    }
+
+    if c_method_tracing_currently_enabled(jit) {
+        // Don't JIT if tracing c_call or c_return
+        gen_counter_incr!(cb, send_cfunc_tracing);
+        return CantCompile;
+    }
+
+    // Delegate to codegen for C methods if we have it.
+    if kw_arg.is_null() {
+        let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def });
+        if codegen_p.is_some() {
+            let known_cfunc_codegen = codegen_p.unwrap();
+            if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) {
+                // cfunc codegen generated code. Terminate the block so
+                // there isn't multiple calls in the same block.
+                jump_to_next_insn(jit, ctx, cb, ocb);
+                return EndBlock;
+            }
+        }
+    }
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Check for interrupts
+    gen_check_ints(cb, side_exit);
+
+    // Stack overflow check
+    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
+    // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t)
+    add_comment(cb, "stack overflow check");
+    lea(
+        cb,
+        REG0,
+        ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize),
+    );
+    cmp(cb, REG_CFP, REG0);
+    jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+
+    // Store incremented PC into current control frame in case callee raises.
+    jit_save_pc(jit, cb, REG0);
+
+    if let Some(block_iseq) = block {
+        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+        // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases
+        // with cfp->block_code.
+        jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize));
+        let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE);
+        mov(cb, block_code_opnd, REG0);
+    }
+
+    // Increment the stack pointer by 3 (in the callee)
+    // sp += 3
+    lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3));
+
+    // Write method entry at sp[-3]
+    // sp[-3] = me;
+    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
+    // any cme we depend on become outdated. See yjit_method_lookup_change().
+    jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
+    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
+
+    // Write block handler at sp[-2]
+    // sp[-2] = block_handler;
+    if let Some(_block_iseq) = block {
+        // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
+        let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF);
+        lea(cb, REG1, cfp_self);
+        or(cb, REG1, imm_opnd(1));
+        mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
+    } else {
+        let dst_opnd = mem_opnd(64, REG0, 8 * -2);
+        mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
+    }
+
+    // Write env flags at sp[-1]
+    // sp[-1] = frame_type;
+    let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
+    if !kw_arg.is_null() {
+        frame_type |= VM_FRAME_FLAG_CFRAME_KW
+    }
+    mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
+
+    // Allocate a new CFP (ec->cfp--)
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+
+    // Setup the new frame
+    // *cfp = (const struct rb_control_frame_struct) {
+    //    .pc         = 0,
+    //    .sp         = sp,
+    //    .iseq       = 0,
+    //    .self       = recv,
+    //    .ep         = sp - 1,
+    //    .block_code = 0,
+    //    .__bp__     = sp,
+    // };
+
+    // Can we re-use ec_cfp_opnd from above?
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    mov(cb, REG1, ec_cfp_opnd);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0));
+
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0));
+    mov(
+        cb,
+        mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE),
+        imm_opnd(0),
+    );
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0);
+    sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0);
+    mov(cb, REG0, recv);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0);
+
+    /*
+    // Verify that we are calling the right function
+    if (YJIT_CHECK_MODE > 0) {  // TODO: will we have a YJIT_CHECK_MODE?
+        // Call check_cfunc_dispatch
+        mov(cb, C_ARG_REGS[0], recv);
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
+        mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
+        call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
+    }
+    */
+
+    if !kw_arg.is_null() {
+        // Build a hash from all kwargs passed
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize));
+        lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0));
+        call_ptr(cb, REG0, build_kwhash as *const u8);
+
+        // Replace the stack location at the start of kwargs with the new hash
+        let stack_opnd = ctx.stack_opnd(argc - passed_argc);
+        mov(cb, stack_opnd, RAX);
+    }
+
+    // Copy SP into RAX because REG_SP will get overwritten
+    lea(cb, RAX, ctx.sp_opnd(0));
+
+    // Pop the C function arguments from the stack (in the caller)
+    ctx.stack_pop((argc + 1).try_into().unwrap());
+
+    // Write interpreter SP into CFP.
+    // Needed in case the callee yields to the block.
+    gen_save_sp(cb, ctx);
+
+    // Non-variadic method
+    if cfunc_argc >= 0 {
+        // Copy the arguments from the stack to the C argument registers
+        // self is the 0th argument and is at index argc from the stack top
+        for i in 0..=passed_argc as usize {
+            // "as usize?" Yeah, you can't index an array by an i32.
+            let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32);
+            let c_arg_reg = C_ARG_REGS[i];
+            mov(cb, c_arg_reg, stack_opnd);
+        }
+    }
+
+    // Variadic method
+    if cfunc_argc == -1 {
+        // The method gets a pointer to the first argument
+        // rb_f_puts(int argc, VALUE *argv, VALUE recv)
+        mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into()));
+        lea(
+            cb,
+            C_ARG_REGS[1],
+            mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32),
+        );
+        mov(
+            cb,
+            C_ARG_REGS[2],
+            mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32),
+        );
+    }
+
+    // Call the C function
+    // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
+    // cfunc comes from compile-time cme->def, which we assume to be stable.
+    // Invalidation logic is in yjit_method_lookup_change()
+    add_comment(cb, "call C function");
+    call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) });
+
+    // Record code position for TracePoint patching. See full_cfunc_return().
+    record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos());
+
+    // Push the return value on the Ruby stack
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    // Pop the stack frame (ec->cfp++)
+    // Can we reuse ec_cfp_opnd from above?
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+
+    // cfunc calls may corrupt types
+    ctx.clear_local_types();
+
+    // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
+    // which allows for sharing the same successor.
+
+    // Jump (fall through) to the call continuation block
+    // We do this to end the current block after the call
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_return_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => {
+            mov(cb, REG0, code_ptr_opnd(target0));
+            mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+        }
+    }
+}
+
+fn gen_send_iseq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+) -> CodegenStatus {
+    let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
+    let mut argc = argc;
+
+    // When you have keyword arguments, there is an extra object that gets
+    // placed on the stack the represents a bitmap of the keywords that were not
+    // specified at the call site. We need to keep track of the fact that this
+    // value is present on the stack in order to properly set up the callee's
+    // stack pointer.
+    let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) };
+    let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0;
+
+    if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 {
+        // We can't handle tailcalls
+        gen_counter_incr!(cb, send_iseq_tailcall);
+        return CantCompile;
+    }
+
+    // No support for callees with these parameters yet as they require allocation
+    // or complex handling.
+    if unsafe {
+        get_iseq_flags_has_rest(iseq)
+            || get_iseq_flags_has_post(iseq)
+            || get_iseq_flags_has_kwrest(iseq)
+    } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // If we have keyword arguments being passed to a callee that only takes
+    // positionals, then we need to allocate a hash. For now we're going to
+    // call that too complex and bail.
+    if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // If we have a method accepting no kwargs (**nil), exit if we have passed
+    // it any kwargs.
+    if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // For computing number of locals to set up for the callee
+    let mut num_params = unsafe { get_iseq_body_param_size(iseq) };
+
+    // Block parameter handling. This mirrors setup_parameters_complex().
+    if unsafe { get_iseq_flags_has_block(iseq) } {
+        if unsafe { get_iseq_body_local_iseq(iseq) == iseq } {
+            num_params -= 1;
+        } else {
+            // In this case (param.flags.has_block && local_iseq != iseq),
+            // the block argument is setup as a local variable and requires
+            // materialization (allocation). Bail.
+            gen_counter_incr!(cb, send_iseq_complex_callee);
+            return CantCompile;
+        }
+    }
+
+    let mut start_pc_offset = 0;
+    let required_num = unsafe { get_iseq_body_param_lead_num(iseq) };
+
+    // This struct represents the metadata about the caller-specified
+    // keyword arguments.
+    let kw_arg = unsafe { vm_ci_kwarg(ci) };
+    let kw_arg_num = if kw_arg.is_null() {
+        0
+    } else {
+        unsafe { get_cikw_keyword_len(kw_arg) }
+    };
+
+    // Arity handling and optional parameter setup
+    let opts_filled = argc - required_num - kw_arg_num;
+    let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) };
+    let opts_missing: i32 = opt_num - opts_filled;
+
+    if opts_filled < 0 || opts_filled > opt_num {
+        gen_counter_incr!(cb, send_iseq_arity_error);
+        return CantCompile;
+    }
+
+    // If we have unfilled optional arguments and keyword arguments then we
+    // would need to move adjust the arguments location to account for that.
+    // For now we aren't handling this case.
+    if doing_kw_call && opts_missing > 0 {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    if opt_num > 0 {
+        num_params -= opts_missing as u32;
+        unsafe {
+            let opt_table = get_iseq_body_param_opt_table(iseq);
+            start_pc_offset = (*opt_table.offset(opts_filled as isize)).as_u32();
+        }
+    }
+
+    if doing_kw_call {
+        // Here we're calling a method with keyword arguments and specifying
+        // keyword arguments at this call site.
+
+        // This struct represents the metadata about the callee-specified
+        // keyword parameters.
+        let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+        let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
+        let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+        let mut required_kwargs_filled = 0;
+
+        if keyword_num > 30 {
+            // We have so many keywords that (1 << num) encoded as a FIXNUM
+            // (which shifts it left one more) no longer fits inside a 32-bit
+            // immediate.
+            gen_counter_incr!(cb, send_iseq_complex_callee);
+            return CantCompile;
+        }
+
+        // Check that the kwargs being passed are valid
+        if supplying_kws {
+            // This is the list of keyword arguments that the callee specified
+            // in its initial declaration.
+            // SAFETY: see compile.c for sizing of this slice.
+            let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) };
+
+            // Here we're going to build up a list of the IDs that correspond to
+            // the caller-specified keyword arguments. If they're not in the
+            // same order as the order specified in the callee declaration, then
+            // we're going to need to generate some code to swap values around
+            // on the stack.
+            let kw_arg_keyword_len: usize =
+                unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap();
+            let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
+            for kwarg_idx in 0..kw_arg_keyword_len {
+                let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
+                caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+            }
+
+            // First, we're going to be sure that the names of every
+            // caller-specified keyword argument correspond to a name in the
+            // list of callee-specified keyword parameters.
+            for caller_kwarg in caller_kwargs {
+                let search_result = callee_kwargs
+                    .iter()
+                    .enumerate() // inject element index
+                    .find(|(_, &kwarg)| kwarg == caller_kwarg);
+
+                match search_result {
+                    None => {
+                        // If the keyword was never found, then we know we have a
+                        // mismatch in the names of the keyword arguments, so we need to
+                        // bail.
+                        gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
+                        return CantCompile;
+                    }
+                    Some((callee_idx, _)) if callee_idx < keyword_required_num => {
+                        // Keep a count to ensure all required kwargs are specified
+                        required_kwargs_filled += 1;
+                    }
+                    _ => (),
+                }
+            }
+        }
+        assert!(required_kwargs_filled <= keyword_required_num);
+        if required_kwargs_filled != keyword_required_num {
+            gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
+            return CantCompile;
+        }
+    }
+
+    // Number of locals that are not parameters
+    let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Check for interrupts
+    gen_check_ints(cb, side_exit);
+
+    let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) };
+    let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() {
+        None
+    } else {
+        Some(leaf_builtin_raw)
+    };
+    if let (None, Some(builtin_info)) = (block, leaf_builtin) {
+        let builtin_argc = unsafe { (*builtin_info).argc };
+        if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) {
+            add_comment(cb, "inlined leaf builtin");
+
+            // Call the builtin func (ec, recv, arg1, arg2, ...)
+            mov(cb, C_ARG_REGS[0], REG_EC);
+
+            // Copy self and arguments
+            for i in 0..=builtin_argc {
+                let stack_opnd = ctx.stack_opnd(builtin_argc - i);
+                let idx: usize = (i + 1).try_into().unwrap();
+                let c_arg_reg = C_ARG_REGS[idx];
+                mov(cb, c_arg_reg, stack_opnd);
+            }
+            ctx.stack_pop((builtin_argc + 1).try_into().unwrap());
+            let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 };
+            call_ptr(cb, REG0, builtin_func_ptr);
+
+            // Push the return value
+            let stack_ret = ctx.stack_push(Type::Unknown);
+            mov(cb, stack_ret, RAX);
+
+            // Note: assuming that the leaf builtin doesn't change local variables here.
+            // Seems like a safe assumption.
+
+            return KeepCompiling;
+        }
+    }
+
+    // Stack overflow check
+    // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
+    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
+    add_comment(cb, "stack overflow check");
+    let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap();
+    let locals_offs =
+        (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32);
+    lea(cb, REG0, ctx.sp_opnd(locals_offs as isize));
+    cmp(cb, REG_CFP, REG0);
+    jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+
+    if doing_kw_call {
+        // Here we're calling a method with keyword arguments and specifying
+        // keyword arguments at this call site.
+
+        // Number of positional arguments the callee expects before the first
+        // keyword argument
+        let args_before_kw = required_num + opt_num;
+
+        // This struct represents the metadata about the caller-specified
+        // keyword arguments.
+        let ci_kwarg = unsafe { vm_ci_kwarg(ci) };
+        let caller_keyword_len: usize = if ci_kwarg.is_null() {
+            0
+        } else {
+            unsafe { get_cikw_keyword_len(ci_kwarg) }
+                .try_into()
+                .unwrap()
+        };
+
+        // This struct represents the metadata about the callee-specified
+        // keyword parameters.
+        let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+
+        add_comment(cb, "keyword args");
+
+        // This is the list of keyword arguments that the callee specified
+        // in its initial declaration.
+        let callee_kwargs = unsafe { (*keyword).table };
+        let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap();
+
+        // Here we're going to build up a list of the IDs that correspond to
+        // the caller-specified keyword arguments. If they're not in the
+        // same order as the order specified in the callee declaration, then
+        // we're going to need to generate some code to swap values around
+        // on the stack.
+        let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs];
+
+        for kwarg_idx in 0..caller_keyword_len {
+            let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
+            caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+        }
+        let mut kwarg_idx = caller_keyword_len;
+
+        let mut unspecified_bits = 0;
+
+        let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+        for callee_idx in keyword_required_num..total_kwargs {
+            let mut already_passed = false;
+            let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) };
+
+            for caller_idx in 0..caller_keyword_len {
+                if caller_kwargs[caller_idx] == callee_kwarg {
+                    already_passed = true;
+                    break;
+                }
+            }
+
+            if !already_passed {
+                // Reserve space on the stack for each default value we'll be
+                // filling in (which is done in the next loop). Also increments
+                // argc so that the callee's SP is recorded correctly.
+                argc += 1;
+                let default_arg = ctx.stack_push(Type::Unknown);
+
+                // callee_idx - keyword->required_num is used in a couple of places below.
+                let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap();
+                let callee_idx_isize: isize = callee_idx.try_into().unwrap();
+                let extra_args = callee_idx_isize - req_num;
+
+                //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
+                let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) };
+
+                if default_value == Qundef {
+                    // Qundef means that this value is not constant and must be
+                    // recalculated at runtime, so we record it in unspecified_bits
+                    // (Qnil is then used as a placeholder instead of Qundef).
+                    unspecified_bits |= 0x01 << extra_args;
+                    default_value = Qnil;
+                }
+
+                jit_mov_gc_ptr(jit, cb, REG0, default_value);
+                mov(cb, default_arg, REG0);
+
+                caller_kwargs[kwarg_idx] = callee_kwarg;
+                kwarg_idx += 1;
+            }
+        }
+
+        assert!(kwarg_idx == total_kwargs);
+
+        // Next, we're going to loop through every keyword that was
+        // specified by the caller and make sure that it's in the correct
+        // place. If it's not we're going to swap it around with another one.
+        for kwarg_idx in 0..total_kwargs {
+            let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap();
+            let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) };
+
+            // If the argument is already in the right order, then we don't
+            // need to generate any code since the expected value is already
+            // in the right place on the stack.
+            if callee_kwarg == caller_kwargs[kwarg_idx] {
+                continue;
+            }
+
+            // In this case the argument is not in the right place, so we
+            // need to find its position where it _should_ be and swap with
+            // that location.
+            for swap_idx in (kwarg_idx + 1)..total_kwargs {
+                if callee_kwarg == caller_kwargs[swap_idx] {
+                    // First we're going to generate the code that is going
+                    // to perform the actual swapping at runtime.
+                    let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
+                    let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
+                    let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw)
+                        .try_into()
+                        .unwrap();
+                    let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw)
+                        .try_into()
+                        .unwrap();
+                    stack_swap(ctx, cb, offset0, offset1, REG1, REG0);
+
+                    // Next we're going to do some bookkeeping on our end so
+                    // that we know the order that the arguments are
+                    // actually in now.
+                    let tmp = caller_kwargs[kwarg_idx];
+                    caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
+                    caller_kwargs[swap_idx] = tmp;
+
+                    break;
+                }
+            }
+        }
+
+        // Keyword arguments cause a special extra local variable to be
+        // pushed onto the stack that represents the parameters that weren't
+        // explicitly given a value and have a non-constant default.
+        let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64());
+        mov(cb, ctx.stack_opnd(-1), unspec_opnd);
+    }
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+
+    // Store the updated SP on the current frame (pop arguments and receiver)
+    add_comment(cb, "store caller sp");
+    lea(
+        cb,
+        REG0,
+        ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)),
+    );
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
+
+    // Store the next PC in the current frame
+    jit_save_pc(jit, cb, REG0);
+
+    if let Some(block_val) = block {
+        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+        // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
+        // with cfp->block_code.
+        let gc_ptr = VALUE(block_val as usize);
+        jit_mov_gc_ptr(jit, cb, REG0, gc_ptr);
+        mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0);
+    }
+
+    // Adjust the callee's stack pointer
+    let offs =
+        (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 });
+    lea(cb, REG0, ctx.sp_opnd(offs));
+
+    // Initialize local variables to Qnil
+    for i in 0..num_locals {
+        let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3);
+        mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into()));
+    }
+
+    add_comment(cb, "push env");
+    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
+    // any cme we depend on become outdated. See yjit_method_lookup_change().
+    jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
+    // Write method entry at sp[-3]
+    // sp[-3] = me;
+    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
+
+    // Write block handler at sp[-2]
+    // sp[-2] = block_handler;
+    match block {
+        Some(_) => {
+            // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
+            lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
+            or(cb, REG1, imm_opnd(1));
+            mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
+        }
+        None => {
+            mov(
+                cb,
+                mem_opnd(64, REG0, 8 * -2),
+                uimm_opnd(VM_BLOCK_HANDLER_NONE.into()),
+            );
+        }
+    }
+
+    // Write env flags at sp[-1]
+    // sp[-1] = frame_type;
+    let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
+    mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
+
+    add_comment(cb, "push callee CFP");
+    // Allocate a new CFP (ec->cfp--)
+    sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+    mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+
+    // Setup the new frame
+    // *cfp = (const struct rb_control_frame_struct) {
+    //    .pc         = pc,
+    //    .sp         = sp,
+    //    .iseq       = iseq,
+    //    .self       = recv,
+    //    .ep         = sp - 1,
+    //    .block_code = 0,
+    //    .__bp__     = sp,
+    // };
+    mov(cb, REG1, recv);
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1);
+    mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0);
+    sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0);
+    jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0);
+    mov(
+        cb,
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE),
+        imm_opnd(0),
+    );
+
+    // No need to set cfp->pc since the callee sets it whenever calling into routines
+    // that could look at it through jit_save_pc().
+    // mov(cb, REG0, const_ptr_opnd(start_pc));
+    // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
+
+    // Stub so we can return to JITted code
+    let return_block = BlockId {
+        iseq: jit.iseq,
+        idx: jit_next_insn_idx(jit),
+    };
+
+    // Create a context for the callee
+    let mut callee_ctx = Context::new(); // Was DEFAULT_CTX
+
+    // Set the argument types in the callee's context
+    for arg_idx in 0..argc {
+        let stack_offs: u16 = (argc - arg_idx - 1).try_into().unwrap();
+        let arg_type = ctx.get_opnd_type(StackOpnd(stack_offs));
+        callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type);
+    }
+
+    let recv_type = ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap()));
+    callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
+
+    // The callee might change locals through Kernel#binding and other means.
+    ctx.clear_local_types();
+
+    // Pop arguments and receiver in return context, push the return value
+    // After the return, sp_offset will be 1. The codegen for leave writes
+    // the return value in case of JIT-to-JIT return.
+    let mut return_ctx = ctx.clone();
+    return_ctx.stack_pop((argc + 1).try_into().unwrap());
+    return_ctx.stack_push(Type::Unknown);
+    return_ctx.set_sp_offset(1);
+    return_ctx.reset_chain_depth();
+
+    // Write the JIT return address on the callee frame
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        return_block,
+        &return_ctx,
+        Some(return_block),
+        Some(&return_ctx),
+        gen_return_branch,
+    );
+
+    //print_str(cb, "calling Ruby func:");
+    //print_str(cb, rb_id2name(vm_ci_mid(ci)));
+
+    // Directly jump to the entry point of the callee
+    gen_direct_jump(
+        jit,
+        &callee_ctx,
+        BlockId {
+            iseq: iseq,
+            idx: start_pc_offset,
+        },
+        cb,
+    );
+
+    EndBlock
+}
+
+fn gen_struct_aref(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    comptime_recv: VALUE,
+    _comptime_recv_klass: VALUE,
+) -> CodegenStatus {
+    if unsafe { vm_ci_argc(ci) } != 0 {
+        return CantCompile;
+    }
+
+    let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
+        .try_into()
+        .unwrap();
+
+    // Confidence checks
+    assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
+    assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+
+    // We are going to use an encoding that takes a 4-byte immediate which
+    // limits the offset to INT32_MAX.
+    {
+        let native_off = (off as i64) * (SIZEOF_VALUE as i64);
+        if native_off > (i32::MAX as i64) {
+            return CantCompile;
+        }
+    }
+
+    // All structs from the same Struct class should have the same
+    // length. So if our comptime_recv is embedded all runtime
+    // structs of the same class should be as well, and the same is
+    // true of the converse.
+    let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) };
+
+    add_comment(cb, "struct aref");
+
+    let recv = ctx.stack_pop(1);
+
+    mov(cb, REG0, recv);
+
+    if embedded != VALUE(0) {
+        let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off));
+        mov(cb, REG0, ary_elt);
+    } else {
+        let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR);
+        mov(cb, REG0, rstruct_ptr);
+        mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off));
+    }
+
+    let ret = ctx.stack_push(Type::Unknown);
+    mov(cb, ret, REG0);
+
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_struct_aset(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    comptime_recv: VALUE,
+    _comptime_recv_klass: VALUE,
+) -> CodegenStatus {
+    if unsafe { vm_ci_argc(ci) } != 1 {
+        return CantCompile;
+    }
+
+    let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
+        .try_into()
+        .unwrap();
+
+    // Confidence checks
+    assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
+    assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+
+    add_comment(cb, "struct aset");
+
+    let val = ctx.stack_pop(1);
+    let recv = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], recv);
+    mov(cb, C_ARG_REGS[1], imm_opnd(off as i64));
+    mov(cb, C_ARG_REGS[2], val);
+    call_ptr(cb, REG0, RSTRUCT_SET as *const u8);
+
+    let ret = ctx.stack_push(Type::Unknown);
+    mov(cb, ret, RAX);
+
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_send_general(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cd: *const rb_call_data,
+    block: Option<IseqPtr>,
+) -> CodegenStatus {
+    // Relevant definitions:
+    // rb_execution_context_t       : vm_core.h
+    // invoker, cfunc logic         : method.h, vm_method.c
+    // rb_callinfo                  : vm_callinfo.h
+    // rb_callable_method_entry_t   : method.h
+    // vm_call_cfunc_with_frame     : vm_insnhelper.c
+    //
+    // For a general overview for how the interpreter calls methods,
+    // see vm_call_method().
+
+    let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
+    let argc = unsafe { vm_ci_argc(ci) };
+    let mid = unsafe { vm_ci_mid(ci) };
+    let flags = unsafe { vm_ci_flag(ci) };
+
+    // Don't JIT calls with keyword splat
+    if flags & VM_CALL_KW_SPLAT != 0 {
+        gen_counter_incr!(cb, send_kw_splat);
+        return CantCompile;
+    }
+
+    // Don't JIT calls that aren't simple
+    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
+    if flags & VM_CALL_ARGS_SPLAT != 0 {
+        gen_counter_incr!(cb, send_args_splat);
+        return CantCompile;
+    }
+    if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+        gen_counter_incr!(cb, send_block_arg);
+        return CantCompile;
+    }
+
+    // Defer compilation so we can specialize on class of receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
+    let comptime_recv_klass = comptime_recv.class_of();
+
+    // Guard that the receiver has the same class as the one from compile time
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+    let recv_opnd = StackOpnd(argc.try_into().unwrap());
+    mov(cb, REG0, recv);
+    if !jit_guard_known_klass(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        comptime_recv_klass,
+        recv_opnd,
+        comptime_recv,
+        SEND_MAX_DEPTH,
+        side_exit,
+    ) {
+        return CantCompile;
+    }
+
+    // Do method lookup
+    let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
+    if cme.is_null() {
+        // TODO: counter
+        return CantCompile;
+    }
+
+    let visi = unsafe { METHOD_ENTRY_VISI(cme) };
+    match visi {
+        METHOD_VISI_PUBLIC => {
+            // Can always call public methods
+        }
+        METHOD_VISI_PRIVATE => {
+            if flags & VM_CALL_FCALL == 0 {
+                // Can only call private methods with FCALL callsites.
+                // (at the moment they are callsites without a receiver or an explicit `self` receiver)
+                return CantCompile;
+            }
+        }
+        METHOD_VISI_PROTECTED => {
+            jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit);
+        }
+        _ => {
+            panic!("cmes should always have a visibility!");
+        }
+    }
+
+    // Register block for invalidation
+    //assert!(cme->called_id == mid);
+    assume_method_lookup_stable(jit, ocb, comptime_recv_klass, cme);
+
+    // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
+    loop {
+        let def_type = unsafe { get_cme_def_type(cme) };
+        match def_type {
+            VM_METHOD_TYPE_ISEQ => {
+                return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc);
+            }
+            VM_METHOD_TYPE_CFUNC => {
+                return gen_send_cfunc(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    ci,
+                    cme,
+                    block,
+                    argc,
+                    &comptime_recv_klass,
+                );
+            }
+            VM_METHOD_TYPE_IVAR => {
+                if argc != 0 {
+                    // Argument count mismatch. Getters take no arguments.
+                    gen_counter_incr!(cb, send_getter_arity);
+                    return CantCompile;
+                }
+
+                if c_method_tracing_currently_enabled(jit) {
+                    // Can't generate code for firing c_call and c_return events
+                    // :attr-tracing:
+                    // Handling the C method tracing events for attr_accessor
+                    // methods is easier than regular C methods as we know the
+                    // "method" we are calling into never enables those tracing
+                    // events. Once global invalidation runs, the code for the
+                    // attr_accessor is invalidated and we exit at the closest
+                    // instruction boundary which is always outside of the body of
+                    // the attr_accessor code.
+                    gen_counter_incr!(cb, send_cfunc_tracing);
+                    return CantCompile;
+                }
+
+                mov(cb, REG0, recv);
+                let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
+
+                return gen_get_ivar(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    SEND_MAX_DEPTH,
+                    comptime_recv,
+                    ivar_name,
+                    recv_opnd,
+                    side_exit,
+                );
+            }
+            VM_METHOD_TYPE_ATTRSET => {
+                if flags & VM_CALL_KWARG != 0 {
+                    gen_counter_incr!(cb, send_attrset_kwargs);
+                    return CantCompile;
+                } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } {
+                    gen_counter_incr!(cb, send_ivar_set_method);
+                    return CantCompile;
+                } else if c_method_tracing_currently_enabled(jit) {
+                    // Can't generate code for firing c_call and c_return events
+                    // See :attr-tracing:
+                    gen_counter_incr!(cb, send_cfunc_tracing);
+                    return CantCompile;
+                } else {
+                    let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
+                    return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name);
+                }
+            }
+            // Block method, e.g. define_method(:foo) { :my_block }
+            VM_METHOD_TYPE_BMETHOD => {
+                gen_counter_incr!(cb, send_bmethod);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_ZSUPER => {
+                gen_counter_incr!(cb, send_zsuper_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_ALIAS => {
+                // Retrieve the aliased method and re-enter the switch
+                cme = unsafe { rb_aliased_callable_method_entry(cme) };
+                continue;
+            }
+            VM_METHOD_TYPE_UNDEF => {
+                gen_counter_incr!(cb, send_undef_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_NOTIMPLEMENTED => {
+                gen_counter_incr!(cb, send_not_implemented_method);
+                return CantCompile;
+            }
+            // Send family of methods, e.g. call/apply
+            VM_METHOD_TYPE_OPTIMIZED => {
+                let opt_type = unsafe { get_cme_def_body_optimized_type(cme) };
+                match opt_type {
+                    OPTIMIZED_METHOD_TYPE_SEND => {
+                        gen_counter_incr!(cb, send_optimized_method_send);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_CALL => {
+                        gen_counter_incr!(cb, send_optimized_method_call);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_BLOCK_CALL => {
+                        gen_counter_incr!(cb, send_optimized_method_block_call);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_STRUCT_AREF => {
+                        return gen_struct_aref(
+                            jit,
+                            ctx,
+                            cb,
+                            ocb,
+                            ci,
+                            cme,
+                            comptime_recv,
+                            comptime_recv_klass,
+                        );
+                    }
+                    OPTIMIZED_METHOD_TYPE_STRUCT_ASET => {
+                        return gen_struct_aset(
+                            jit,
+                            ctx,
+                            cb,
+                            ocb,
+                            ci,
+                            cme,
+                            comptime_recv,
+                            comptime_recv_klass,
+                        );
+                    }
+                    _ => {
+                        panic!("unknown optimized method type!")
+                    }
+                }
+            }
+            VM_METHOD_TYPE_MISSING => {
+                gen_counter_incr!(cb, send_missing_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_REFINED => {
+                gen_counter_incr!(cb, send_refined_method);
+                return CantCompile;
+            }
+            _ => {
+                unreachable!();
+            }
+        }
+    }
+}
+
+fn gen_opt_send_without_block(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd = jit_get_arg(jit, 0).as_ptr();
+
+    gen_send_general(jit, ctx, cb, ocb, cd, None)
+}
+
+fn gen_send(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd = jit_get_arg(jit, 0).as_ptr();
+    let block = jit_get_arg(jit, 1).as_optional_ptr();
+    return gen_send_general(jit, ctx, cb, ocb, cd, block);
+}
+
+fn gen_invokesuper(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
+    let block: Option<IseqPtr> = jit_get_arg(jit, 1).as_optional_ptr();
+
+    // Defer compilation so we can specialize on class of receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let me = unsafe { rb_vm_frame_method_entry(get_ec_cfp(jit.ec.unwrap())) };
+    if me.is_null() {
+        return CantCompile;
+    }
+
+    // FIXME: We should track and invalidate this block when this cme is invalidated
+    let current_defined_class = unsafe { (*me).defined_class };
+    let mid = unsafe { get_def_original_id((*me).def) };
+
+    if me != unsafe { rb_callable_method_entry(current_defined_class, (*me).called_id) } {
+        // Though we likely could generate this call, as we are only concerned
+        // with the method entry remaining valid, assume_method_lookup_stable
+        // below requires that the method lookup matches as well
+        return CantCompile;
+    }
+
+    // vm_search_normal_superclass
+    let rbasic_ptr: *const RBasic = current_defined_class.as_ptr();
+    if current_defined_class.builtin_type() == RUBY_T_ICLASS
+        && unsafe { FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT)) != VALUE(0) }
+    {
+        return CantCompile;
+    }
+    let comptime_superclass =
+        unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) };
+
+    let ci = unsafe { get_call_data_ci(cd) };
+    let argc = unsafe { vm_ci_argc(ci) };
+
+    let ci_flags = unsafe { vm_ci_flag(ci) };
+
+    // Don't JIT calls that aren't simple
+    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
+    if ci_flags & VM_CALL_ARGS_SPLAT != 0 {
+        gen_counter_incr!(cb, send_args_splat);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_KWARG != 0 {
+        gen_counter_incr!(cb, send_keywords);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_KW_SPLAT != 0 {
+        gen_counter_incr!(cb, send_kw_splat);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 {
+        gen_counter_incr!(cb, send_block_arg);
+        return CantCompile;
+    }
+
+    // Ensure we haven't rebound this method onto an incompatible class.
+    // In the interpreter we try to avoid making this check by performing some
+    // cheaper calculations first, but since we specialize on the method entry
+    // and so only have to do this once at compile time this is fine to always
+    // check and side exit.
+    let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
+    if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) {
+        return CantCompile;
+    }
+
+    // Do method lookup
+    let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) };
+
+    if cme.is_null() {
+        return CantCompile;
+    }
+
+    // Check that we'll be able to write this method dispatch before generating checks
+    let cme_def_type = unsafe { get_cme_def_type(cme) };
+    if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC {
+        // others unimplemented
+        return CantCompile;
+    }
+
+    // Guard that the receiver has the same class as the one from compile time
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) };
+    let ep = unsafe { get_cfp_ep(cfp) };
+    let cref_me = unsafe { *ep.offset(VM_ENV_DATA_INDEX_ME_CREF.try_into().unwrap()) };
+    let me_as_value = VALUE(me as usize);
+    if cref_me != me_as_value {
+        // This will be the case for super within a block
+        return CantCompile;
+    }
+
+    add_comment(cb, "guard known me");
+    mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+    let ep_me_opnd = mem_opnd(
+        64,
+        REG0,
+        (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32),
+    );
+    jit_mov_gc_ptr(jit, cb, REG1, me_as_value);
+    cmp(cb, ep_me_opnd, REG1);
+    jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed));
+
+    if block.is_none() {
+        // Guard no block passed
+        // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
+        // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
+        //
+        // TODO: this could properly forward the current block handler, but
+        // would require changes to gen_send_*
+        add_comment(cb, "guard no block given");
+        // EP is in REG0 from above
+        let ep_specval_opnd = mem_opnd(
+            64,
+            REG0,
+            (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
+        );
+        cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
+        jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block));
+    }
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+    mov(cb, REG0, recv);
+
+    // We need to assume that both our current method entry and the super
+    // method entry we invoke remain stable
+    assume_method_lookup_stable(jit, ocb, current_defined_class, me);
+    assume_method_lookup_stable(jit, ocb, comptime_superclass, cme);
+
+    // Method calls may corrupt types
+    ctx.clear_local_types();
+
+    match cme_def_type {
+        VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc),
+        VM_METHOD_TYPE_CFUNC => {
+            gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null())
+        }
+        _ => unreachable!(),
+    }
+}
+
+fn gen_leave(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Only the return value should be on the stack
+    assert!(ctx.get_stack_size() == 1);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Load environment pointer EP from CFP
+    mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+
+    // Check for interrupts
+    add_comment(cb, "check for interrupts");
+    gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt));
+
+    // Load the return value
+    mov(cb, REG0, ctx.stack_pop(1));
+
+    // Pop the current frame (ec->cfp++)
+    // Note: the return PC is already in the previous CFP
+    add_comment(cb, "pop stack frame");
+    add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+    mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+
+    // Reload REG_SP for the caller and write the return value.
+    // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
+    mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
+    mov(cb, mem_opnd(64, REG_SP, 0), REG0);
+
+    // Jump to the JIT return address on the frame that was just popped
+    let offset_to_jit_return =
+        -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32);
+    jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
+
+    EndBlock
+}
+
+fn gen_getglobal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let gid = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we might make a Ruby call for warning
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
+
+    call_ptr(cb, REG0, rb_gvar_get as *const u8);
+
+    let top = ctx.stack_push(Type::Unknown);
+    mov(cb, top, RAX);
+
+    KeepCompiling
+}
+
+fn gen_setglobal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let gid = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we might make a Ruby call for
+    // Kernel#set_trace_var
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
+
+    let val = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[1], val);
+
+    call_ptr(cb, REG0, rb_gvar_set as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_anytostring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because we might make a Ruby call for
+    // Kernel#set_trace_var
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let str = ctx.stack_pop(1);
+    let val = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], str);
+    mov(cb, C_ARG_REGS[1], val);
+
+    call_ptr(cb, REG0, rb_obj_as_string_result as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_objtostring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let recv = ctx.stack_opnd(0);
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 0);
+
+    if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            comptime_recv.class_of(),
+            StackOpnd(0),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+        // No work needed. The string value is already on the top of the stack.
+        KeepCompiling
+    } else {
+        let cd = jit_get_arg(jit, 0).as_ptr();
+        gen_send_general(jit, ctx, cb, ocb, cd, None)
+    }
+}
+
+fn gen_intern(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because we might allocate
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let str = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], str);
+
+    call_ptr(cb, REG0, rb_str_intern as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_toregexp(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let opt = jit_get_arg(jit, 0).as_i64();
+    let cnt = jit_get_arg(jit, 1).as_usize();
+
+    // Save the PC and SP because this allocates an object and could
+    // raise an exception.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)));
+    ctx.stack_pop(cnt);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(0));
+    mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap()));
+    lea(cb, C_ARG_REGS[2], values_ptr);
+    call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8);
+
+    // Save the array so we can clear it later
+    push(cb, RAX);
+    push(cb, RAX); // Alignment
+    mov(cb, C_ARG_REGS[0], RAX);
+    mov(cb, C_ARG_REGS[1], imm_opnd(opt));
+    call_ptr(cb, REG0, rb_reg_new_ary as *const u8);
+
+    // The actual regex is in RAX now.  Pop the temp array from
+    // rb_ary_tmp_new_from_values into C arg regs so we can clear it
+    pop(cb, REG1); // Alignment
+    pop(cb, C_ARG_REGS[0]);
+
+    // The value we want to push on the stack is in RAX right now
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    // Clear the temp array.
+    call_ptr(cb, REG0, rb_ary_clear as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_getspecial(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // This takes two arguments, key and type
+    // key is only used when type == 0
+    // A non-zero type determines which type of backref to fetch
+    //rb_num_t key = jit_get_arg(jit, 0);
+    let rtype = jit_get_arg(jit, 1).as_u64();
+
+    if rtype == 0 {
+        // not yet implemented
+        return CantCompile;
+    } else if rtype & 0x01 != 0 {
+        // Fetch a "special" backref based on a char encoded by shifting by 1
+
+        // Can raise if matchdata uninitialized
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        // call rb_backref_get()
+        add_comment(cb, "rb_backref_get");
+        call_ptr(cb, REG0, rb_backref_get as *const u8);
+        mov(cb, C_ARG_REGS[0], RAX);
+
+        let rt_u8: u8 = (rtype >> 1).try_into().unwrap();
+        match rt_u8.into() {
+            '&' => {
+                add_comment(cb, "rb_reg_last_match");
+                call_ptr(cb, REG0, rb_reg_last_match as *const u8);
+            }
+            '`' => {
+                add_comment(cb, "rb_reg_match_pre");
+                call_ptr(cb, REG0, rb_reg_match_pre as *const u8);
+            }
+            '\'' => {
+                add_comment(cb, "rb_reg_match_post");
+                call_ptr(cb, REG0, rb_reg_match_post as *const u8);
+            }
+            '+' => {
+                add_comment(cb, "rb_reg_match_last");
+                call_ptr(cb, REG0, rb_reg_match_last as *const u8);
+            }
+            _ => panic!("invalid back-ref"),
+        }
+
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        KeepCompiling
+    } else {
+        // Fetch the N-th match from the last backref based on type shifted by 1
+
+        // Can raise if matchdata uninitialized
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        // call rb_backref_get()
+        add_comment(cb, "rb_backref_get");
+        call_ptr(cb, REG0, rb_backref_get as *const u8);
+
+        // rb_reg_nth_match((int)(type >> 1), backref);
+        add_comment(cb, "rb_reg_nth_match");
+        mov(
+            cb,
+            C_ARG_REGS[0],
+            imm_opnd((rtype >> 1).try_into().unwrap()),
+        );
+        mov(cb, C_ARG_REGS[1], RAX);
+        call_ptr(cb, REG0, rb_reg_nth_match as *const u8);
+
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        KeepCompiling
+    }
+}
+
+fn gen_getclassvariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // rb_vm_getclassvariable can raise exceptions.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
+    mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
+    mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
+
+    call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8);
+
+    let stack_top = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_top, RAX);
+
+    KeepCompiling
+}
+
+fn gen_setclassvariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // rb_vm_setclassvariable can raise exceptions.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
+    mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
+    mov(cb, C_ARG_REGS[3], ctx.stack_pop(1));
+    mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
+
+    call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_opt_getinlinecache(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0);
+    let const_cache_as_value = jit_get_arg(jit, 1);
+    let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr();
+
+    // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
+    let ice = unsafe { (*ic).entry };
+    if ice.is_null() {
+        // In this case, leave a block that unconditionally side exits
+        // for the interpreter to invalidate.
+        return CantCompile;
+    }
+
+    // Make sure there is an exit for this block as the interpreter might want
+    // to invalidate this block from yjit_constant_ic_update().
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    if !unsafe { (*ice).ic_cref }.is_null() {
+        // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Call function to verify the cache. It doesn't allocate or call methods.
+        mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8));
+        mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+        call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8);
+
+        // Check the result. _Bool is one byte in SysV.
+        test(cb, AL, AL);
+        jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss));
+
+        // Push ic->entry->value
+        mov(cb, REG0, const_ptr_opnd(ic as *mut u8));
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY));
+        let stack_top = ctx.stack_push(Type::Unknown);
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE));
+        mov(cb, stack_top, REG0);
+    } else {
+        // Optimize for single ractor mode.
+        // FIXME: This leaks when st_insert raises NoMemoryError
+        if !assume_single_ractor_mode(jit, ocb) {
+            return CantCompile;
+        }
+
+        // Invalidate output code on any constant writes associated with
+        // constants referenced within the current block.
+        assume_stable_constant_names(jit, ocb);
+
+        jit_putobject(jit, ctx, cb, unsafe { (*ice).value });
+    }
+
+    // Jump over the code for filling the cache
+    let jump_idx = jit_next_insn_idx(jit) + jump_offset.as_u32();
+    gen_direct_jump(
+        jit,
+        ctx,
+        BlockId {
+            iseq: jit.iseq,
+            idx: jump_idx,
+        },
+        cb,
+    );
+    EndBlock
+}
+
+// Push the explicit block parameter onto the temporary stack. Part of the
+// interpreter's scheme for avoiding Proc allocations when delegating
+// explict block parameters.
+fn gen_getblockparamproxy(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // A mirror of the interpreter code. Checking for the case
+    // where it's pushing rb_block_param_proxy.
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // EP level
+    let level = jit_get_arg(jit, 1).as_u32();
+
+    // Load environment pointer EP from CFP
+    gen_get_ep(cb, REG0, level);
+
+    // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
+    let flag_check = mem_opnd(
+        64,
+        REG0,
+        (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32),
+    );
+    test(
+        cb,
+        flag_check,
+        uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()),
+    );
+    jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified));
+
+    // Load the block handler for the current frame
+    // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
+    mov(
+        cb,
+        REG0,
+        mem_opnd(
+            64,
+            REG0,
+            (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
+        ),
+    );
+
+    // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
+    and(cb, REG0_8, imm_opnd(0x3));
+
+    // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
+    cmp(cb, REG0_8, imm_opnd(0x1));
+    jnz_ptr(
+        cb,
+        counted_exit!(ocb, side_exit, gbpp_block_handler_not_iseq),
+    );
+
+    // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
+    mov(
+        cb,
+        REG0,
+        const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()),
+    );
+    assert!(!unsafe { rb_block_param_proxy }.special_const_p());
+    let top = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_invokebuiltin(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+    let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc");
+
+    // ec, self, and arguments
+    if bf_argc + 2 > C_ARG_REGS.len() {
+        return CantCompile;
+    }
+
+    // If the calls don't allocate, do they need up to date PC, SP?
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Call the builtin func (ec, recv, arg1, arg2, ...)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+
+    // Copy arguments from locals
+    for i in 0..bf_argc {
+        let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32);
+        let c_arg_reg = C_ARG_REGS[2 + i];
+        mov(cb, c_arg_reg, stack_opnd);
+    }
+
+    call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+
+    // Push the return value
+    ctx.stack_pop(bf_argc);
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// opt_invokebuiltin_delegate calls a builtin function, like
+// invokebuiltin does, but instead of taking arguments from the top of the
+// stack uses the argument locals (and self) from the current method.
+fn gen_opt_invokebuiltin_delegate(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+    let bf_argc = unsafe { (*bf).argc };
+    let start_index = jit_get_arg(jit, 1).as_i32();
+
+    // ec, self, and arguments
+    if bf_argc + 2 > (C_ARG_REGS.len() as i32) {
+        return CantCompile;
+    }
+
+    // If the calls don't allocate, do they need up to date PC, SP?
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    if bf_argc > 0 {
+        // Load environment pointer EP from CFP
+        mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+    }
+
+    // Call the builtin func (ec, recv, arg1, arg2, ...)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+
+    // Copy arguments from locals
+    for i in 0..bf_argc {
+        let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
+        let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i;
+        let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32));
+        let offs: usize = (i + 2) as usize;
+        let c_arg_reg = C_ARG_REGS[offs];
+        mov(cb, c_arg_reg, local_opnd);
+    }
+    call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+/// Maps a YARV opcode to a code generation function (if supported)
+fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
+    let VALUE(opcode) = opcode;
+    assert!(opcode < VM_INSTRUCTION_SIZE);
+
+    match opcode {
+        OP_NOP => Some(gen_nop),
+        OP_POP => Some(gen_pop),
+        OP_DUP => Some(gen_dup),
+        OP_DUPN => Some(gen_dupn),
+        OP_SWAP => Some(gen_swap),
+        OP_PUTNIL => Some(gen_putnil),
+        OP_PUTOBJECT => Some(gen_putobject),
+        OP_PUTOBJECT_INT2FIX_0_ => Some(gen_putobject_int2fix),
+        OP_PUTOBJECT_INT2FIX_1_ => Some(gen_putobject_int2fix),
+        OP_PUTSELF => Some(gen_putself),
+        OP_PUTSPECIALOBJECT => Some(gen_putspecialobject),
+        OP_SETN => Some(gen_setn),
+        OP_TOPN => Some(gen_topn),
+        OP_ADJUSTSTACK => Some(gen_adjuststack),
+        OP_GETLOCAL => Some(gen_getlocal),
+        OP_GETLOCAL_WC_0 => Some(gen_getlocal_wc0),
+        OP_GETLOCAL_WC_1 => Some(gen_getlocal_wc1),
+        OP_SETLOCAL => Some(gen_setlocal),
+        OP_SETLOCAL_WC_0 => Some(gen_setlocal_wc0),
+        OP_SETLOCAL_WC_1 => Some(gen_setlocal_wc1),
+        OP_OPT_PLUS => Some(gen_opt_plus),
+        OP_OPT_MINUS => Some(gen_opt_minus),
+        OP_OPT_AND => Some(gen_opt_and),
+        OP_OPT_OR => Some(gen_opt_or),
+        OP_NEWHASH => Some(gen_newhash),
+        OP_DUPHASH => Some(gen_duphash),
+        OP_NEWARRAY => Some(gen_newarray),
+        OP_DUPARRAY => Some(gen_duparray),
+        OP_CHECKTYPE => Some(gen_checktype),
+        OP_OPT_LT => Some(gen_opt_lt),
+        OP_OPT_LE => Some(gen_opt_le),
+        OP_OPT_GT => Some(gen_opt_gt),
+        OP_OPT_GE => Some(gen_opt_ge),
+        OP_OPT_MOD => Some(gen_opt_mod),
+        OP_OPT_STR_FREEZE => Some(gen_opt_str_freeze),
+        OP_OPT_STR_UMINUS => Some(gen_opt_str_uminus),
+        OP_SPLATARRAY => Some(gen_splatarray),
+        OP_NEWRANGE => Some(gen_newrange),
+        OP_PUTSTRING => Some(gen_putstring),
+        OP_EXPANDARRAY => Some(gen_expandarray),
+        OP_DEFINED => Some(gen_defined),
+        OP_CHECKKEYWORD => Some(gen_checkkeyword),
+        OP_CONCATSTRINGS => Some(gen_concatstrings),
+        OP_GETINSTANCEVARIABLE => Some(gen_getinstancevariable),
+        OP_SETINSTANCEVARIABLE => Some(gen_setinstancevariable),
+
+        OP_OPT_EQ => Some(gen_opt_eq),
+        OP_OPT_NEQ => Some(gen_opt_neq),
+        OP_OPT_AREF => Some(gen_opt_aref),
+        OP_OPT_ASET => Some(gen_opt_aset),
+        OP_OPT_MULT => Some(gen_opt_mult),
+        OP_OPT_DIV => Some(gen_opt_div),
+        OP_OPT_LTLT => Some(gen_opt_ltlt),
+        OP_OPT_NIL_P => Some(gen_opt_nil_p),
+        OP_OPT_EMPTY_P => Some(gen_opt_empty_p),
+        OP_OPT_NOT => Some(gen_opt_not),
+        OP_OPT_SIZE => Some(gen_opt_size),
+        OP_OPT_LENGTH => Some(gen_opt_length),
+        OP_OPT_REGEXPMATCH2 => Some(gen_opt_regexpmatch2),
+        OP_OPT_GETINLINECACHE => Some(gen_opt_getinlinecache),
+        OP_INVOKEBUILTIN => Some(gen_invokebuiltin),
+        OP_OPT_INVOKEBUILTIN_DELEGATE => Some(gen_opt_invokebuiltin_delegate),
+        OP_OPT_INVOKEBUILTIN_DELEGATE_LEAVE => Some(gen_opt_invokebuiltin_delegate),
+        OP_OPT_CASE_DISPATCH => Some(gen_opt_case_dispatch),
+        OP_BRANCHIF => Some(gen_branchif),
+        OP_BRANCHUNLESS => Some(gen_branchunless),
+        OP_BRANCHNIL => Some(gen_branchnil),
+        OP_JUMP => Some(gen_jump),
+
+        OP_GETBLOCKPARAMPROXY => Some(gen_getblockparamproxy),
+        OP_OPT_SEND_WITHOUT_BLOCK => Some(gen_opt_send_without_block),
+        OP_SEND => Some(gen_send),
+        OP_INVOKESUPER => Some(gen_invokesuper),
+        OP_LEAVE => Some(gen_leave),
+
+        OP_GETGLOBAL => Some(gen_getglobal),
+        OP_SETGLOBAL => Some(gen_setglobal),
+        OP_ANYTOSTRING => Some(gen_anytostring),
+        OP_OBJTOSTRING => Some(gen_objtostring),
+        OP_INTERN => Some(gen_intern),
+        OP_TOREGEXP => Some(gen_toregexp),
+        OP_GETSPECIAL => Some(gen_getspecial),
+        OP_GETCLASSVARIABLE => Some(gen_getclassvariable),
+        OP_SETCLASSVARIABLE => Some(gen_setclassvariable),
+
+        // Unimplemented opcode, YJIT won't generate code for this yet
+        _ => None,
+    }
+}
+
+// Return true when the codegen function generates code.
+// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
+// See yjit_reg_method().
+type MethodGenFn = fn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+    known_recv_class: *const VALUE,
+) -> bool;
+
+/// Global state needed for code generation
+pub struct CodegenGlobals {
+    /// Inline code block (fast path)
+    inline_cb: CodeBlock,
+
+    /// Outlined code block (slow path)
+    outlined_cb: OutlinedCb,
+
+    /// Code for exiting back to the interpreter from the leave instruction
+    leave_exit_code: CodePtr,
+
+    // For exiting from YJIT frame from branch_stub_hit().
+    // Filled by gen_code_for_exit_from_stub().
+    stub_exit_code: CodePtr,
+
+    // Code for full logic of returning from C method and exiting to the interpreter
+    outline_full_cfunc_return_pos: CodePtr,
+
+    /// For implementing global code invalidation
+    global_inval_patches: Vec<CodepagePatch>,
+
+    /// For implementing global code invalidation. The number of bytes counting from the beginning
+    /// of the inline code block that should not be changed. After patching for global invalidation,
+    /// no one should make changes to the invalidated code region anymore. This is used to
+    /// break out of invalidation race when there are multiple ractors.
+    inline_frozen_bytes: usize,
+
+    // Methods for generating code for hardcoded (usually C) methods
+    method_codegen_table: HashMap<u64, MethodGenFn>,
+}
+
+/// For implementing global code invalidation. A position in the inline
+/// codeblock to patch into a JMP rel32 which jumps into some code in
+/// the outlined codeblock to exit to the interpreter.
+pub struct CodepagePatch {
+    pub inline_patch_pos: CodePtr,
+    pub outlined_target_pos: CodePtr,
+}
+
+/// Private singleton instance of the codegen globals
+static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
+
+impl CodegenGlobals {
+    /// Initialize the codegen globals
+    pub fn init() {
+        // Executable memory size in MiB
+        let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
+
+        #[cfg(not(test))]
+        let (mut cb, mut ocb) = {
+            let page_size = unsafe { rb_yjit_get_page_size() }.as_usize();
+            let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) };
+            let cb = CodeBlock::new(mem_block, mem_size / 2, page_size);
+            let ocb = OutlinedCb::wrap(CodeBlock::new(
+                unsafe { mem_block.add(mem_size / 2) },
+                mem_size / 2,
+                page_size,
+            ));
+            (cb, ocb)
+        };
+
+        // In test mode we're not linking with the C code
+        // so we don't allocate executable memory
+        #[cfg(test)]
+        let mut cb = CodeBlock::new_dummy(mem_size / 2);
+        #[cfg(test)]
+        let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
+
+        let leave_exit_code = gen_leave_exit(&mut ocb);
+
+        let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
+
+        // Generate full exit code for C func
+        let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
+
+        // Mark all code memory as executable
+        cb.mark_all_executable();
+        ocb.unwrap().mark_all_executable();
+
+        let mut codegen_globals = CodegenGlobals {
+            inline_cb: cb,
+            outlined_cb: ocb,
+            leave_exit_code: leave_exit_code,
+            stub_exit_code: stub_exit_code,
+            outline_full_cfunc_return_pos: cfunc_exit_code,
+            global_inval_patches: Vec::new(),
+            inline_frozen_bytes: 0,
+            method_codegen_table: HashMap::new(),
+        };
+
+        // Register the method codegen functions
+        codegen_globals.reg_method_codegen_fns();
+
+        // Initialize the codegen globals instance
+        unsafe {
+            CODEGEN_GLOBALS = Some(codegen_globals);
+        }
+    }
+
+    // Register a specialized codegen function for a particular method. Note that
+    // the if the function returns true, the code it generates runs without a
+    // control frame and without interrupt checks. To avoid creating observable
+    // behavior changes, the codegen function should only target simple code paths
+    // that do not allocate and do not make method calls.
+    fn yjit_reg_method(&mut self, klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
+        let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!");
+        let mid = unsafe { rb_intern(id_string.as_ptr()) };
+        let me = unsafe { rb_method_entry_at(klass, mid) };
+
+        if me.is_null() {
+            panic!("undefined optimized method!");
+        }
+
+        // For now, only cfuncs are supported
+        //RUBY_ASSERT(me && me->def);
+        //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
+
+        let method_serial = unsafe {
+            let def = (*me).def;
+            get_def_method_serial(def)
+        };
+
+        self.method_codegen_table.insert(method_serial, gen_fn);
+    }
+
+    /// Register codegen functions for some Ruby core methods
+    fn reg_method_codegen_fns(&mut self) {
+        unsafe {
+            // Specialization for C methods. See yjit_reg_method() for details.
+            self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
+
+            self.yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
+            self.yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
+
+            self.yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
+
+            // rb_str_to_s() methods in string.c
+            self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
+            self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
+            self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
+
+            // Thread.current
+            self.yjit_reg_method(
+                rb_singleton_class(rb_cThread),
+                "current",
+                jit_thread_s_current,
+            );
+        }
+    }
+
+    /// Get a mutable reference to the codegen globals instance
+    pub fn get_instance() -> &'static mut CodegenGlobals {
+        unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
+    }
+
+    /// Get a mutable reference to the inline code block
+    pub fn get_inline_cb() -> &'static mut CodeBlock {
+        &mut CodegenGlobals::get_instance().inline_cb
+    }
+
+    /// Get a mutable reference to the outlined code block
+    pub fn get_outlined_cb() -> &'static mut OutlinedCb {
+        &mut CodegenGlobals::get_instance().outlined_cb
+    }
+
+    pub fn get_leave_exit_code() -> CodePtr {
+        CodegenGlobals::get_instance().leave_exit_code
+    }
+
+    pub fn get_stub_exit_code() -> CodePtr {
+        CodegenGlobals::get_instance().stub_exit_code
+    }
+
+    pub fn push_global_inval_patch(i_pos: CodePtr, o_pos: CodePtr) {
+        let patch = CodepagePatch {
+            inline_patch_pos: i_pos,
+            outlined_target_pos: o_pos,
+        };
+        CodegenGlobals::get_instance()
+            .global_inval_patches
+            .push(patch);
+    }
+
+    // Drain the list of patches and return it
+    pub fn take_global_inval_patches() -> Vec<CodepagePatch> {
+        let globals = CodegenGlobals::get_instance();
+        mem::take(&mut globals.global_inval_patches)
+    }
+
+    pub fn get_inline_frozen_bytes() -> usize {
+        CodegenGlobals::get_instance().inline_frozen_bytes
+    }
+
+    pub fn set_inline_frozen_bytes(frozen_bytes: usize) {
+        CodegenGlobals::get_instance().inline_frozen_bytes = frozen_bytes;
+    }
+
+    pub fn get_outline_full_cfunc_return_pos() -> CodePtr {
+        CodegenGlobals::get_instance().outline_full_cfunc_return_pos
+    }
+
+    pub fn look_up_codegen_method(method_serial: u64) -> Option<MethodGenFn> {
+        let table = &CodegenGlobals::get_instance().method_codegen_table;
+
+        let option_ref = table.get(&method_serial);
+        match option_ref {
+            None => None,
+            Some(&mgf) => Some(mgf), // Deref
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) {
+        let block = Block::new(BLOCKID_NULL, &Context::default());
+
+        return (
+            JITState::new(&block),
+            Context::new(),
+            CodeBlock::new_dummy(256 * 1024),
+            OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)),
+        );
+    }
+
+    #[test]
+    fn test_gen_leave_exit() {
+        let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024));
+        gen_leave_exit(&mut ocb);
+        assert!(ocb.unwrap().get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_exit() {
+        let (_, ctx, mut cb, _) = setup_codegen();
+        gen_exit(0 as *mut VALUE, &ctx, &mut cb);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_get_side_exit() {
+        let (mut jit, ctx, _, mut ocb) = setup_codegen();
+        get_side_exit(&mut jit, &mut ocb, &ctx);
+        assert!(ocb.unwrap().get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_check_ints() {
+        let (_, _ctx, mut cb, mut ocb) = setup_codegen();
+        let side_exit = ocb.unwrap().get_write_ptr();
+        gen_check_ints(&mut cb, side_exit);
+    }
+
+    #[test]
+    fn test_gen_nop() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(context.diff(&Context::new()), 0);
+        assert_eq!(cb.get_write_pos(), 0);
+    }
+
+    #[test]
+    fn test_gen_pop() {
+        let (mut jit, _, mut cb, mut ocb) = setup_codegen();
+        let mut context = Context::new_with_stack_size(1);
+        let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(context.diff(&Context::new()), 0);
+    }
+
+    #[test]
+    fn test_gen_dup() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        // Did we duplicate the type information for the Fixnum type?
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0)));
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_dupn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+
+        let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(3)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_swap() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+
+        let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+        let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+        assert_eq!(tmp_type_next, Type::Flonum);
+    }
+
+    #[test]
+    fn test_putnil() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Nil);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_putobject_qtrue() {
+        // Test gen_putobject with Qtrue
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+
+        let mut value_array: [u64; 2] = [0, Qtrue.into()];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::True);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_putobject_fixnum() {
+        // Test gen_putobject with a Fixnum to test another conditional branch
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+
+        // The Fixnum 7 is encoded as 7 * 2 + 1, or 15
+        let mut value_array: [u64; 2] = [0, 15];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_int2fix() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        jit.opcode = OP_PUTOBJECT_INT2FIX_0_;
+        let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally.
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+    }
+
+    #[test]
+    fn test_putself() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_setn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+
+        let mut value_array: [u64; 2] = [0, 2];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_topn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+
+        let mut value_array: [u64; 2] = [0, 1];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_adjuststack() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+        context.stack_push(Type::Fixnum);
+
+        let mut value_array: [u64; 3] = [0, 2, 0];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() == 0); // No instructions written
+    }
+
+    #[test]
+    fn test_gen_leave() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        // Push return value
+        context.stack_push(Type::Fixnum);
+        gen_leave(&mut jit, &mut context, &mut cb, &mut ocb);
+    }
+}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
new file mode 100644
index 0000000000..5ea3ee8193
--- /dev/null
+++ b/yjit/src/core.rs
@@ -0,0 +1,2071 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::codegen::*;
+use crate::cruby::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::*;
+use core::ffi::c_void;
+use std::cell::*;
+use std::hash::{Hash, Hasher};
+use std::mem;
+use std::mem::size_of;
+use std::ptr;
+use std::rc::{Rc, Weak};
+use InsnOpnd::*;
+use TempMapping::*;
+
+// Maximum number of temp value types we keep track of
+pub const MAX_TEMP_TYPES: usize = 8;
+
+// Maximum number of local variable types we keep track of
+const MAX_LOCAL_TYPES: usize = 8;
+
+// Represent the type of a value (local/stack/self) in YJIT
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum Type {
+    Unknown,
+    UnknownImm,
+    UnknownHeap,
+    Nil,
+    True,
+    False,
+    Fixnum,
+    Flonum,
+    Array,
+    Hash,
+    ImmSymbol,
+    HeapSymbol,
+    String,
+}
+
+// Default initialization
+impl Default for Type {
+    fn default() -> Self {
+        Type::Unknown
+    }
+}
+
+impl Type {
+    /// This returns an appropriate Type based on a known value
+    pub fn from(val: VALUE) -> Type {
+        if val.special_const_p() {
+            if val.fixnum_p() {
+                Type::Fixnum
+            } else if val.nil_p() {
+                Type::Nil
+            } else if val == Qtrue {
+                Type::True
+            } else if val == Qfalse {
+                Type::False
+            } else if val.static_sym_p() {
+                Type::ImmSymbol
+            } else if val.flonum_p() {
+                Type::Flonum
+            } else {
+                unreachable!()
+            }
+        } else {
+            match val.builtin_type() {
+                RUBY_T_ARRAY => Type::Array,
+                RUBY_T_HASH => Type::Hash,
+                RUBY_T_STRING => Type::String,
+                _ => Type::UnknownHeap,
+            }
+        }
+    }
+
+    /// Check if the type is an immediate
+    pub fn is_imm(&self) -> bool {
+        match self {
+            Type::UnknownImm => true,
+            Type::Nil => true,
+            Type::True => true,
+            Type::False => true,
+            Type::Fixnum => true,
+            Type::Flonum => true,
+            Type::ImmSymbol => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true when the type is not specific.
+    pub fn is_unknown(&self) -> bool {
+        match self {
+            Type::Unknown | Type::UnknownImm | Type::UnknownHeap => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true when we know the VALUE is a specific handle type,
+    /// such as a static symbol ([Type::ImmSymbol], i.e. true from RB_STATIC_SYM_P()).
+    /// Opposite of [Self::is_unknown].
+    pub fn is_specific(&self) -> bool {
+        !self.is_unknown()
+    }
+
+    /// Check if the type is a heap object
+    pub fn is_heap(&self) -> bool {
+        match self {
+            Type::UnknownHeap => true,
+            Type::Array => true,
+            Type::Hash => true,
+            Type::HeapSymbol => true,
+            Type::String => true,
+            _ => false,
+        }
+    }
+
+    /// Compute a difference between two value types
+    /// Returns 0 if the two are the same
+    /// Returns > 0 if different but compatible
+    /// Returns usize::MAX if incompatible
+    pub fn diff(self, dst: Self) -> usize {
+        // Perfect match, difference is zero
+        if self == dst {
+            return 0;
+        }
+
+        // Any type can flow into an unknown type
+        if dst == Type::Unknown {
+            return 1;
+        }
+
+        // Specific heap type into unknown heap type is imperfect but valid
+        if self.is_heap() && dst == Type::UnknownHeap {
+            return 1;
+        }
+
+        // Specific immediate type into unknown immediate type is imperfect but valid
+        if self.is_imm() && dst == Type::UnknownImm {
+            return 1;
+        }
+
+        // Incompatible types
+        return usize::MAX;
+    }
+
+    /// Upgrade this type into a more specific compatible type
+    /// The new type must be compatible and at least as specific as the previously known type.
+    fn upgrade(&mut self, src: Self) {
+        // Here we're checking that src is more specific than self
+        assert!(src.diff(*self) != usize::MAX);
+        *self = src;
+    }
+}
+
+// Potential mapping of a value on the temporary stack to
+// self, a local variable or constant so that we can track its type
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum TempMapping {
+    MapToStack, // Normal stack value
+    MapToSelf,  // Temp maps to the self operand
+    MapToLocal(u8), // Temp maps to a local variable with index
+                //ConstMapping,         // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue)
+}
+
+impl Default for TempMapping {
+    fn default() -> Self {
+        MapToStack
+    }
+}
+
+// Operand to a bytecode instruction
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum InsnOpnd {
+    // The value is self
+    SelfOpnd,
+
+    // Temporary stack operand with stack index
+    StackOpnd(u16),
+}
+
+/// Code generation context
+/// Contains information we can use to specialize/optimize code
+/// There are a lot of context objects so we try to keep the size small.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct Context {
+    // Number of values currently on the temporary stack
+    stack_size: u16,
+
+    // Offset of the JIT SP relative to the interpreter SP
+    // This represents how far the JIT's SP is from the "real" SP
+    sp_offset: i16,
+
+    // Depth of this block in the sidechain (eg: inline-cache chain)
+    chain_depth: u8,
+
+    // Local variable types we keep track of
+    local_types: [Type; MAX_LOCAL_TYPES],
+
+    // Temporary variable types we keep track of
+    temp_types: [Type; MAX_TEMP_TYPES],
+
+    // Type we track for self
+    self_type: Type,
+
+    // Mapping of temp stack entries to types we track
+    temp_mapping: [TempMapping; MAX_TEMP_TYPES],
+}
+
+/// Tuple of (iseq, idx) used to identify basic blocks
+/// There are a lot of blockid objects so we try to keep the size small.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct BlockId {
+    /// Instruction sequence
+    pub iseq: IseqPtr,
+
+    /// Index in the iseq where the block starts
+    pub idx: u32,
+}
+
+/// Null block id constant
+pub const BLOCKID_NULL: BlockId = BlockId {
+    iseq: ptr::null(),
+    idx: 0,
+};
+
+/// Branch code shape enumeration
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum BranchShape {
+    Next0,   // Target 0 is next
+    Next1,   // Target 1 is next
+    Default, // Neither target is next
+}
+
+// Branch code generation function signature
+type BranchGenFn =
+    fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option<CodePtr>, shape: BranchShape) -> ();
+
+/// Store info about an outgoing branch in a code segment
+/// Note: care must be taken to minimize the size of branch objects
+struct Branch {
+    // Block this is attached to
+    block: BlockRef,
+
+    // Positions where the generated code starts and ends
+    start_addr: Option<CodePtr>,
+    end_addr: Option<CodePtr>,
+
+    // Context right after the branch instruction
+    src_ctx: Context,
+
+    // Branch target blocks and their contexts
+    targets: [BlockId; 2],
+    target_ctxs: [Context; 2],
+    blocks: [Option<BlockRef>; 2],
+
+    // Jump target addresses
+    dst_addrs: [Option<CodePtr>; 2],
+
+    // Branch code generation function
+    gen_fn: BranchGenFn,
+
+    // Shape of the branch
+    shape: BranchShape,
+}
+
+impl std::fmt::Debug for Branch {
+    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // TODO: expand this if needed. #[derive(Debug)] on Branch gave a
+        // strange error related to BranchGenFn
+        formatter
+            .debug_struct("Branch")
+            .field("start", &self.start_addr)
+            .field("end", &self.end_addr)
+            .field("targets", &self.targets)
+            .finish()
+    }
+}
+
+impl Branch {
+    // Compute the size of the branch code
+    fn code_size(&self) -> usize {
+        (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+    }
+}
+
+// In case this block is invalidated, these two pieces of info
+// help to remove all pointers to this block in the system.
+#[derive(Debug)]
+pub struct CmeDependency {
+    pub receiver_klass: VALUE,
+    pub callee_cme: *const rb_callable_method_entry_t,
+}
+
+/// Basic block version
+/// Represents a portion of an iseq compiled with a given context
+/// Note: care must be taken to minimize the size of block_t objects
+#[derive(Debug)]
+pub struct Block {
+    // Bytecode sequence (iseq, idx) this is a version of
+    blockid: BlockId,
+
+    // Index one past the last instruction for this block in the iseq
+    end_idx: u32,
+
+    // Context at the start of the block
+    // This should never be mutated
+    ctx: Context,
+
+    // Positions where the generated code starts and ends
+    start_addr: Option<CodePtr>,
+    end_addr: Option<CodePtr>,
+
+    // List of incoming branches (from predecessors)
+    // These are reference counted (ownership shared between predecessor and successors)
+    incoming: Vec<BranchRef>,
+
+    // NOTE: we might actually be able to store the branches here without refcounting
+    // however, using a RefCell makes it easy to get a pointer to Branch objects
+    //
+    // List of outgoing branches (to successors)
+    outgoing: Vec<BranchRef>,
+
+    // FIXME: should these be code pointers instead?
+    // Offsets for GC managed objects in the mainline code block
+    gc_object_offsets: Vec<u32>,
+
+    // CME dependencies of this block, to help to remove all pointers to this
+    // block in the system.
+    cme_dependencies: Vec<CmeDependency>,
+
+    // Code address of an exit for `ctx` and `blockid`.
+    // Used for block invalidation.
+    pub entry_exit: Option<CodePtr>,
+}
+
+/// Reference-counted pointer to a block that can be borrowed mutably.
+/// Wrapped so we could implement [Hash] and [Eq] for use with stdlib collections.
+#[derive(Debug)]
+pub struct BlockRef(Rc<RefCell<Block>>);
+
+/// Reference-counted pointer to a branch that can be borrowed mutably
+type BranchRef = Rc<RefCell<Branch>>;
+
+/// List of block versions for a given blockid
+type VersionList = Vec<BlockRef>;
+
+/// Map from iseq indices to lists of versions for that given blockid
+/// An instance of this is stored on each iseq
+type VersionMap = Vec<VersionList>;
+
+impl BlockRef {
+    /// Constructor
+    pub fn new(rc: Rc<RefCell<Block>>) -> Self {
+        Self(rc)
+    }
+
+    /// Borrow the block through [RefCell].
+    pub fn borrow(&self) -> Ref<'_, Block> {
+        self.0.borrow()
+    }
+
+    /// Borrow the block for mutation through [RefCell].
+    pub fn borrow_mut(&self) -> RefMut<'_, Block> {
+        self.0.borrow_mut()
+    }
+}
+
+impl Clone for BlockRef {
+    /// Clone the [Rc]
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+
+impl Hash for BlockRef {
+    /// Hash the reference by hashing the pointer
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        let rc_ptr = Rc::as_ptr(&self.0);
+        rc_ptr.hash(state);
+    }
+}
+
+impl PartialEq for BlockRef {
+    /// Equality defined by allocation identity
+    fn eq(&self, other: &Self) -> bool {
+        Rc::ptr_eq(&self.0, &other.0)
+    }
+}
+
+/// It's comparison by identity so all the requirements are statisfied
+impl Eq for BlockRef {}
+
+/// This is all the data YJIT stores on an iseq
+/// This will be dynamically allocated by C code
+/// C code should pass an &mut IseqPayload to us
+/// when calling into YJIT
+#[derive(Default)]
+pub struct IseqPayload {
+    version_map: VersionMap,
+}
+
+impl IseqPayload {
+    /// Remove all block versions from the payload and then return them as an iterator
+    pub fn take_all_blocks(&mut self) -> impl Iterator<Item = BlockRef> {
+        // Empty the blocks
+        let version_map = mem::take(&mut self.version_map);
+
+        // Turn it into an iterator that owns the blocks and return
+        version_map.into_iter().flat_map(|versions| versions)
+    }
+}
+
+/// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut`
+/// upholds aliasing rules and that the argument is a valid iseq.
+pub unsafe fn load_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
+    let payload = rb_iseq_get_yjit_payload(iseq);
+    let payload: *mut IseqPayload = payload.cast();
+    payload.as_mut()
+}
+
+/// Get the payload object associated with an iseq. Create one if none exists.
+fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
+    use core::ffi::c_void;
+    type VoidPtr = *mut c_void;
+
+    let payload_non_null = unsafe {
+        let payload = rb_iseq_get_yjit_payload(iseq);
+        if payload.is_null() {
+            // Increment the compiled iseq count
+            incr_counter!(compiled_iseq_count);
+
+            // Allocate a new payload with Box and transfer ownership to the GC.
+            // We drop the payload with Box::from_raw when the GC frees the iseq and calls us.
+            // NOTE(alan): Sometimes we read from an iseq without ever writing to it.
+            // We allocate in those cases anyways.
+            let new_payload = Box::into_raw(Box::new(IseqPayload::default()));
+            rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr);
+
+            new_payload
+        } else {
+            payload as *mut IseqPayload
+        }
+    };
+
+    // SAFETY: we should have the VM lock and all other Ruby threads should be asleep. So we have
+    // exclusive mutable access.
+    // Hmm, nothing seems to stop calling this on the same
+    // iseq twice, though, which violates aliasing rules.
+    unsafe { payload_non_null.as_mut() }.unwrap()
+}
+
+/// Free the per-iseq payload
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
+    let payload = {
+        if payload.is_null() {
+            // Nothing to free.
+            return;
+        } else {
+            payload as *mut IseqPayload
+        }
+    };
+
+    use crate::invariants;
+
+    // Take ownership of the payload with Box::from_raw().
+    // It drops right before this function returns.
+    // SAFETY: We got the pointer from Box::into_raw().
+    let payload = unsafe { Box::from_raw(payload) };
+
+    // Remove all blocks in the payload from global invariants table.
+    for versions in &payload.version_map {
+        for block in versions {
+            invariants::block_assumptions_free(&block);
+        }
+    }
+}
+
+/// GC callback for marking GC objects in the the per-iseq payload.
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
+    let payload = if payload.is_null() {
+        // Nothing to mark.
+        return;
+    } else {
+        // SAFETY: It looks like the GC takes the VM lock while marking
+        // so we should be satisfying aliasing rules here.
+        unsafe { &*(payload as *const IseqPayload) }
+    };
+
+    // For marking VALUEs written into the inline code block.
+    // We don't write VALUEs in the outlined block.
+    let cb: &CodeBlock = CodegenGlobals::get_inline_cb();
+
+    for versions in &payload.version_map {
+        for block in versions {
+            let block = block.borrow();
+
+            unsafe { rb_gc_mark_movable(block.blockid.iseq.into()) };
+
+            // Mark method entry dependencies
+            for cme_dep in &block.cme_dependencies {
+                unsafe { rb_gc_mark_movable(cme_dep.receiver_klass) };
+                unsafe { rb_gc_mark_movable(cme_dep.callee_cme.into()) };
+            }
+
+            // Mark outgoing branch entries
+            for branch in &block.outgoing {
+                let branch = branch.borrow();
+                for target in &branch.targets {
+                    unsafe { rb_gc_mark_movable(target.iseq.into()) };
+                }
+            }
+
+            // Walk over references to objects in generated code.
+            for offset in &block.gc_object_offsets {
+                let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+                // Creating an unaligned pointer is well defined unlike in C.
+                let value_address = value_address as *const VALUE;
+
+                // SAFETY: these point to YJIT's code buffer
+                unsafe {
+                    let object = value_address.read_unaligned();
+                    rb_gc_mark_movable(object);
+                };
+            }
+        }
+    }
+}
+
+/// GC callback for updating GC objects in the the per-iseq payload.
+/// This is a mirror of [rb_yjit_iseq_mark].
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
+    let payload = if payload.is_null() {
+        // Nothing to update.
+        return;
+    } else {
+        // SAFETY: It looks like the GC takes the VM lock while updating references
+        // so we should be satisfying aliasing rules here.
+        unsafe { &*(payload as *const IseqPayload) }
+    };
+
+    // Evict other threads from generated code since we are about to patch them.
+    // Also acts as an assert that we hold the VM lock.
+    unsafe { rb_vm_barrier() };
+
+    // For updating VALUEs written into the inline code block.
+    let cb = CodegenGlobals::get_inline_cb();
+
+    for versions in &payload.version_map {
+        for block in versions {
+            let mut block = block.borrow_mut();
+
+            block.blockid.iseq = unsafe { rb_gc_location(block.blockid.iseq.into()) }.as_iseq();
+
+            // Update method entry dependencies
+            for cme_dep in &mut block.cme_dependencies {
+                cme_dep.receiver_klass = unsafe { rb_gc_location(cme_dep.receiver_klass) };
+                cme_dep.callee_cme = unsafe { rb_gc_location(cme_dep.callee_cme.into()) }.as_cme();
+            }
+
+            // Update outgoing branch entries
+            for branch in &block.outgoing {
+                let mut branch = branch.borrow_mut();
+                for target in &mut branch.targets {
+                    target.iseq = unsafe { rb_gc_location(target.iseq.into()) }.as_iseq();
+                }
+            }
+
+            // Walk over references to objects in generated code.
+            for offset in &block.gc_object_offsets {
+                let offset_to_value = offset.as_usize();
+                let value_address: *const u8 = cb.get_ptr(offset_to_value).raw_ptr();
+                // Creating an unaligned pointer is well defined unlike in C.
+                let value_address = value_address as *mut VALUE;
+
+                // SAFETY: these point to YJIT's code buffer
+                let object = unsafe { value_address.read_unaligned() };
+                let new_addr = unsafe { rb_gc_location(object) };
+
+                // Only write when the VALUE moves, to be CoW friendly.
+                if new_addr != object {
+                    // Possibly unlock the page we need to update
+                    cb.mark_position_writable(offset_to_value);
+
+                    // Object could cross a page boundary, so unlock there as well
+                    cb.mark_position_writable(offset_to_value + size_of::<VALUE>() - 1);
+
+                    // SAFETY: we just made this address writable
+                    unsafe { value_address.write_unaligned(new_addr) };
+                }
+            }
+        }
+    }
+
+    // Note that we would have returned already if YJIT is off.
+    cb.mark_all_executable();
+
+    // I guess we need to make the outlined block executable as well because
+    // we don't split the two at exact page boundaries.
+    CodegenGlobals::get_outlined_cb()
+        .unwrap()
+        .mark_all_executable();
+}
+
+/// Get all blocks for a particular place in an iseq.
+fn get_version_list(blockid: BlockId) -> &'static mut VersionList {
+    let payload = get_iseq_payload(blockid.iseq);
+    let insn_idx = blockid.idx.as_usize();
+
+    // Expand the version map as necessary
+    if insn_idx >= payload.version_map.len() {
+        payload
+            .version_map
+            .resize(insn_idx + 1, VersionList::default());
+    }
+
+    return payload.version_map.get_mut(insn_idx).unwrap();
+}
+
+/// Take all of the blocks for a particular place in an iseq
+pub fn take_version_list(blockid: BlockId) -> VersionList {
+    let payload = get_iseq_payload(blockid.iseq);
+    let insn_idx = blockid.idx.as_usize();
+
+    if insn_idx >= payload.version_map.len() {
+        VersionList::default()
+    } else {
+        mem::take(&mut payload.version_map[insn_idx])
+    }
+}
+
+/// Count the number of block versions matching a given blockid
+fn get_num_versions(blockid: BlockId) -> usize {
+    let insn_idx = blockid.idx.as_usize();
+    let payload = get_iseq_payload(blockid.iseq);
+
+    payload
+        .version_map
+        .get(insn_idx)
+        .map(|versions| versions.len())
+        .unwrap_or(0)
+}
+
+/// Get a list of block versions generated for an iseq
+/// This is used for disassembly (see disasm.rs)
+pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
+    let payload = get_iseq_payload(iseq);
+
+    let mut blocks = Vec::<BlockRef>::new();
+
+    // For each instruction index
+    for insn_idx in 0..payload.version_map.len() {
+        let version_list = &payload.version_map[insn_idx];
+
+        // For each version at this instruction index
+        for version in version_list {
+            // Clone the block ref and add it to the list
+            blocks.push(version.clone());
+        }
+    }
+
+    return blocks;
+}
+
+/// Retrieve a basic block version for an (iseq, idx) tuple
+/// This will return None if no version is found
+fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
+    let versions = get_version_list(blockid);
+
+    // Best match found
+    let mut best_version: Option<BlockRef> = None;
+    let mut best_diff = usize::MAX;
+
+    // For each version matching the blockid
+    for blockref in versions.iter_mut() {
+        let block = blockref.borrow();
+        let diff = ctx.diff(&block.ctx);
+
+        // Note that we always prefer the first matching
+        // version found because of inline-cache chains
+        if diff < best_diff {
+            best_version = Some(blockref.clone());
+            best_diff = diff;
+        }
+    }
+
+    // If greedy versioning is enabled
+    if get_option!(greedy_versioning) {
+        // If we're below the version limit, don't settle for an imperfect match
+        if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 {
+            return None;
+        }
+    }
+
+    return best_version;
+}
+
+/// Produce a generic context when the block version limit is hit for a blockid
+pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
+    // Guard chains implement limits separately, do nothing
+    if ctx.chain_depth > 0 {
+        return *ctx;
+    }
+
+    // If this block version we're about to add will hit the version limit
+    if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+        // Produce a generic context that stores no type information,
+        // but still respects the stack_size and sp_offset constraints.
+        // This new context will then match all future requests.
+        let mut generic_ctx = Context::default();
+        generic_ctx.stack_size = ctx.stack_size;
+        generic_ctx.sp_offset = ctx.sp_offset;
+
+        // Mutate the incoming context
+        return generic_ctx;
+    }
+
+    return *ctx;
+}
+
+/// Keep track of a block version. Block should be fully constructed.
+/// Uses `cb` for running write barriers.
+fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
+    let block = blockref.borrow();
+
+    // Function entry blocks must have stack size 0
+    assert!(!(block.blockid.idx == 0 && block.ctx.stack_size > 0));
+
+    let version_list = get_version_list(block.blockid);
+
+    version_list.push(blockref.clone());
+
+    // By writing the new block to the iseq, the iseq now
+    // contains new references to Ruby objects. Run write barriers.
+    let iseq: VALUE = block.blockid.iseq.into();
+    for dep in block.iter_cme_deps() {
+        obj_written!(iseq, dep.receiver_klass);
+        obj_written!(iseq, dep.callee_cme.into());
+    }
+
+    // Run write barriers for all objects in generated code.
+    for offset in &block.gc_object_offsets {
+        let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+        // Creating an unaligned pointer is well defined unlike in C.
+        let value_address: *const VALUE = value_address.cast();
+
+        let object = unsafe { value_address.read_unaligned() };
+        obj_written!(iseq, object);
+    }
+
+    incr_counter!(compiled_block_count);
+}
+
+/// Remove a block version from the version map of its parent ISEQ
+fn remove_block_version(blockref: &BlockRef) {
+    let block = blockref.borrow();
+    let version_list = get_version_list(block.blockid);
+
+    // Retain the versions that are not this one
+    version_list.retain(|other| blockref != other);
+}
+
+//===========================================================================
+// I put the implementation of traits for core.rs types below
+// We can move these closer to the above structs later if we want.
+//===========================================================================
+
+impl Block {
+    pub fn new(blockid: BlockId, ctx: &Context) -> BlockRef {
+        let block = Block {
+            blockid,
+            end_idx: 0,
+            ctx: *ctx,
+            start_addr: None,
+            end_addr: None,
+            incoming: Vec::new(),
+            outgoing: Vec::new(),
+            gc_object_offsets: Vec::new(),
+            cme_dependencies: Vec::new(),
+            entry_exit: None,
+        };
+
+        // Wrap the block in a reference counted refcell
+        // so that the block ownership can be shared
+        BlockRef::new(Rc::new(RefCell::new(block)))
+    }
+
+    pub fn get_blockid(&self) -> BlockId {
+        self.blockid
+    }
+
+    pub fn get_end_idx(&self) -> u32 {
+        self.end_idx
+    }
+
+    pub fn get_ctx(&self) -> Context {
+        self.ctx
+    }
+
+    pub fn get_start_addr(&self) -> Option<CodePtr> {
+        self.start_addr
+    }
+
+    pub fn get_end_addr(&self) -> Option<CodePtr> {
+        self.end_addr
+    }
+
+    /// Get an immutable iterator over cme dependencies
+    pub fn iter_cme_deps(&self) -> std::slice::Iter<'_, CmeDependency> {
+        self.cme_dependencies.iter()
+    }
+
+    /// Set the starting address in the generated code for the block
+    /// This can be done only once for a block
+    pub fn set_start_addr(&mut self, addr: CodePtr) {
+        assert!(self.start_addr.is_none());
+        self.start_addr = Some(addr);
+    }
+
+    /// Set the end address in the generated for the block
+    /// This can be done only once for a block
+    pub fn set_end_addr(&mut self, addr: CodePtr) {
+        // The end address can only be set after the start address is set
+        assert!(self.start_addr.is_some());
+
+        // TODO: assert constraint that blocks can shrink but not grow in length
+        self.end_addr = Some(addr);
+    }
+
+    /// Set the index of the last instruction in the block
+    /// This can be done only once for a block
+    pub fn set_end_idx(&mut self, end_idx: u32) {
+        assert!(self.end_idx == 0);
+        self.end_idx = end_idx;
+    }
+
+    pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) {
+        self.gc_object_offsets.push(ptr_offset);
+    }
+
+    /// Instantiate a new CmeDependency struct and add it to the list of
+    /// dependencies for this block.
+    pub fn add_cme_dependency(
+        &mut self,
+        receiver_klass: VALUE,
+        callee_cme: *const rb_callable_method_entry_t,
+    ) {
+        self.cme_dependencies.push(CmeDependency {
+            receiver_klass,
+            callee_cme,
+        });
+    }
+
+    // Compute the size of the block code
+    pub fn code_size(&self) -> usize {
+        (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+    }
+}
+
+impl Context {
+    pub fn new_with_stack_size(size: i16) -> Self {
+        return Context {
+            stack_size: size as u16,
+            sp_offset: size,
+            chain_depth: 0,
+            local_types: [Type::Unknown; MAX_LOCAL_TYPES],
+            temp_types: [Type::Unknown; MAX_TEMP_TYPES],
+            self_type: Type::Unknown,
+            temp_mapping: [MapToStack; MAX_TEMP_TYPES],
+        };
+    }
+
+    pub fn new() -> Self {
+        return Self::new_with_stack_size(0);
+    }
+
+    pub fn get_stack_size(&self) -> u16 {
+        self.stack_size
+    }
+
+    pub fn get_sp_offset(&self) -> i16 {
+        self.sp_offset
+    }
+
+    pub fn set_sp_offset(&mut self, offset: i16) {
+        self.sp_offset = offset;
+    }
+
+    pub fn get_chain_depth(&self) -> u8 {
+        self.chain_depth
+    }
+
+    pub fn reset_chain_depth(&mut self) {
+        self.chain_depth = 0;
+    }
+
+    pub fn increment_chain_depth(&mut self) {
+        self.chain_depth += 1;
+    }
+
+    /// Get an operand for the adjusted stack pointer address
+    pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd {
+        let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes;
+        let offset = offset as i32;
+        return mem_opnd(64, REG_SP, offset);
+    }
+
+    /// Push one new value on the temp stack with an explicit mapping
+    /// Return a pointer to the new stack top
+    pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd {
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return self.stack_push_mapping((mapping, Type::Unknown));
+        }
+
+        let stack_size: usize = self.stack_size.into();
+
+        // Keep track of the type and mapping of the value
+        if stack_size < MAX_TEMP_TYPES {
+            self.temp_mapping[stack_size] = mapping;
+            self.temp_types[stack_size] = temp_type;
+
+            if let MapToLocal(idx) = mapping {
+                assert!((idx as usize) < MAX_LOCAL_TYPES);
+            }
+        }
+
+        self.stack_size += 1;
+        self.sp_offset += 1;
+
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
+        return mem_opnd(64, REG_SP, offset);
+    }
+
+    /// Push one new value on the temp stack
+    /// Return a pointer to the new stack top
+    pub fn stack_push(&mut self, val_type: Type) -> X86Opnd {
+        return self.stack_push_mapping((MapToStack, val_type));
+    }
+
+    /// Push the self value on the stack
+    pub fn stack_push_self(&mut self) -> X86Opnd {
+        return self.stack_push_mapping((MapToSelf, Type::Unknown));
+    }
+
+    /// Push a local variable on the stack
+    pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd {
+        if local_idx >= MAX_LOCAL_TYPES {
+            return self.stack_push(Type::Unknown);
+        }
+
+        return self.stack_push_mapping((MapToLocal(local_idx as u8), Type::Unknown));
+    }
+
+    // Pop N values off the stack
+    // Return a pointer to the stack top before the pop operation
+    pub fn stack_pop(&mut self, n: usize) -> X86Opnd {
+        assert!(n <= self.stack_size.into());
+
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
+        let top = mem_opnd(64, REG_SP, offset);
+
+        // Clear the types of the popped values
+        for i in 0..n {
+            let idx: usize = (self.stack_size as usize) - i - 1;
+
+            if idx < MAX_TEMP_TYPES {
+                self.temp_types[idx] = Type::Unknown;
+                self.temp_mapping[idx] = MapToStack;
+            }
+        }
+
+        self.stack_size -= n as u16;
+        self.sp_offset -= n as i16;
+
+        return top;
+    }
+
+    /// Get an operand pointing to a slot on the temp stack
+    pub fn stack_opnd(&self, idx: i32) -> X86Opnd {
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32);
+        let opnd = mem_opnd(64, REG_SP, offset);
+        return opnd;
+    }
+
+    /// Get the type of an instruction operand
+    pub fn get_opnd_type(&self, opnd: InsnOpnd) -> Type {
+        match opnd {
+            SelfOpnd => self.self_type,
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx: usize = (self.stack_size - 1 - idx).into();
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return Type::Unknown;
+                }
+
+                let mapping = self.temp_mapping[stack_idx];
+
+                match mapping {
+                    MapToSelf => self.self_type,
+                    MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize],
+                    MapToLocal(idx) => {
+                        assert!((idx as usize) < MAX_LOCAL_TYPES);
+                        return self.local_types[idx as usize];
+                    }
+                }
+            }
+        }
+    }
+
+    /// Get the currently tracked type for a local variable
+    pub fn get_local_type(&self, idx: usize) -> Type {
+        if idx > MAX_LOCAL_TYPES {
+            return Type::Unknown;
+        }
+
+        return self.local_types[idx];
+    }
+
+    /// Upgrade (or "learn") the type of an instruction operand
+    /// This value must be compatible and at least as specific as the previously known type.
+    /// If this value originated from self, or an lvar, the learned type will be
+    /// propagated back to its source.
+    pub fn upgrade_opnd_type(&mut self, opnd: InsnOpnd, opnd_type: Type) {
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return;
+        }
+
+        match opnd {
+            SelfOpnd => self.self_type.upgrade(opnd_type),
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return;
+                }
+
+                let mapping = self.temp_mapping[stack_idx];
+
+                match mapping {
+                    MapToSelf => self.self_type.upgrade(opnd_type),
+                    MapToStack => self.temp_types[stack_idx].upgrade(opnd_type),
+                    MapToLocal(idx) => {
+                        let idx = idx as usize;
+                        assert!(idx < MAX_LOCAL_TYPES);
+                        self.local_types[idx].upgrade(opnd_type);
+                    }
+                }
+            }
+        }
+    }
+
+    /*
+    Get both the type and mapping (where the value originates) of an operand.
+    This is can be used with stack_push_mapping or set_opnd_mapping to copy
+    a stack value's type while maintaining the mapping.
+    */
+    pub fn get_opnd_mapping(&self, opnd: InsnOpnd) -> (TempMapping, Type) {
+        let opnd_type = self.get_opnd_type(opnd);
+
+        match opnd {
+            SelfOpnd => (MapToSelf, opnd_type),
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                if stack_idx < MAX_TEMP_TYPES {
+                    (self.temp_mapping[stack_idx], opnd_type)
+                } else {
+                    // We can't know the source of this stack operand, so we assume it is
+                    // a stack-only temporary. type will be UNKNOWN
+                    assert!(opnd_type == Type::Unknown);
+                    (MapToStack, opnd_type)
+                }
+            }
+        }
+    }
+
+    /// Overwrite both the type and mapping of a stack operand.
+    pub fn set_opnd_mapping(&mut self, opnd: InsnOpnd, (mapping, opnd_type): (TempMapping, Type)) {
+        match opnd {
+            SelfOpnd => unreachable!("self always maps to self"),
+            StackOpnd(idx) => {
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                // If type propagation is disabled, store no types
+                if get_option!(no_type_prop) {
+                    return;
+                }
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return;
+                }
+
+                self.temp_mapping[stack_idx] = mapping;
+
+                // Only used when mapping == MAP_STACK
+                self.temp_types[stack_idx] = opnd_type;
+            }
+        }
+    }
+
+    /// Set the type of a local variable
+    pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) {
+        let ctx = self;
+
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return;
+        }
+
+        if local_idx >= MAX_LOCAL_TYPES {
+            return;
+        }
+
+        // If any values on the stack map to this local we must detach them
+        for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() {
+            *mapping = match *mapping {
+                MapToStack => MapToStack,
+                MapToSelf => MapToSelf,
+                MapToLocal(idx) => {
+                    if idx as usize == local_idx {
+                        ctx.temp_types[i] = ctx.local_types[idx as usize];
+                        MapToStack
+                    } else {
+                        MapToLocal(idx)
+                    }
+                }
+            }
+        }
+
+        ctx.local_types[local_idx] = local_type;
+    }
+
+    /// Erase local variable type information
+    /// eg: because of a call we can't track
+    pub fn clear_local_types(&mut self) {
+        // When clearing local types we must detach any stack mappings to those
+        // locals. Even if local values may have changed, stack values will not.
+        for (i, mapping) in self.temp_mapping.iter_mut().enumerate() {
+            *mapping = match *mapping {
+                MapToStack => MapToStack,
+                MapToSelf => MapToSelf,
+                MapToLocal(idx) => {
+                    self.temp_types[i] = self.local_types[idx as usize];
+                    MapToStack
+                }
+            }
+        }
+
+        // Clear the local types
+        self.local_types = [Type::default(); MAX_LOCAL_TYPES];
+    }
+
+    /// Compute a difference score for two context objects
+    /// Returns 0 if the two contexts are the same
+    /// Returns > 0 if different but compatible
+    /// Returns usize::MAX if incompatible
+    pub fn diff(&self, dst: &Context) -> usize {
+        // Self is the source context (at the end of the predecessor)
+        let src = self;
+
+        // Can only lookup the first version in the chain
+        if dst.chain_depth != 0 {
+            return usize::MAX;
+        }
+
+        // Blocks with depth > 0 always produce new versions
+        // Sidechains cannot overlap
+        if src.chain_depth != 0 {
+            return usize::MAX;
+        }
+
+        if dst.stack_size != src.stack_size {
+            return usize::MAX;
+        }
+
+        if dst.sp_offset != src.sp_offset {
+            return usize::MAX;
+        }
+
+        // Difference sum
+        let mut diff = 0;
+
+        // Check the type of self
+        let self_diff = src.self_type.diff(dst.self_type);
+
+        if self_diff == usize::MAX {
+            return usize::MAX;
+        }
+
+        diff += self_diff;
+
+        // For each local type we track
+        for i in 0..src.local_types.len() {
+            let t_src = src.local_types[i];
+            let t_dst = dst.local_types[i];
+            let temp_diff = t_src.diff(t_dst);
+
+            if temp_diff == usize::MAX {
+                return usize::MAX;
+            }
+
+            diff += temp_diff;
+        }
+
+        // For each value on the temp stack
+        for i in 0..src.stack_size {
+            let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i));
+            let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i));
+
+            // If the two mappings aren't the same
+            if src_mapping != dst_mapping {
+                if dst_mapping == MapToStack {
+                    // We can safely drop information about the source of the temp
+                    // stack operand.
+                    diff += 1;
+                } else {
+                    return usize::MAX;
+                }
+            }
+
+            let temp_diff = src_type.diff(dst_type);
+
+            if temp_diff == usize::MAX {
+                return usize::MAX;
+            }
+
+            diff += temp_diff;
+        }
+
+        return diff;
+    }
+}
+
+impl BlockId {
+    /// Print Ruby source location for debugging
+    #[cfg(debug_assertions)]
+    pub fn dump_src_loc(&self) {
+        unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx) }
+    }
+}
+
+/// See [gen_block_series_body]. This simply counts compilation failures.
+fn gen_block_series(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Option<BlockRef> {
+    let result = gen_block_series_body(blockid, start_ctx, ec, cb, ocb);
+    if result.is_none() {
+        incr_counter!(compilation_failure);
+    }
+
+    result
+}
+
+/// Immediately compile a series of block versions at a starting point and
+/// return the starting block.
+fn gen_block_series_body(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Option<BlockRef> {
+    // Keep track of all blocks compiled in this batch
+    const EXPECTED_BATCH_SIZE: usize = 4;
+    let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
+
+    // Generate code for the first block
+    let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
+    batch.push(first_block.clone()); // Keep track of this block version
+
+    // Add the block version to the VersionMap for this ISEQ
+    add_block_version(&first_block, cb);
+
+    // Loop variable
+    let mut last_blockref = first_block.clone();
+    loop {
+        // Get the last outgoing branch from the previous block.
+        let last_branchref = {
+            let last_block = last_blockref.borrow();
+            match last_block.outgoing.last() {
+                Some(branch) => branch.clone(),
+                None => {
+                    break;
+                } // If last block has no branches, stop.
+            }
+        };
+        let mut last_branch = last_branchref.borrow_mut();
+
+        // gen_direct_jump() can request a block to be placed immediately after by
+        // leaving `None`s in the `dst_addrs` array.
+        match &last_branch.dst_addrs {
+            [None, None] => (),
+            _ => {
+                break;
+            } // If there is no next block to compile, stop
+        };
+
+        // Get id and context for the new block
+        let requested_id = last_branch.targets[0];
+        let requested_ctx = &last_branch.target_ctxs[0];
+        assert_ne!(
+            last_branch.targets[0], BLOCKID_NULL,
+            "block id must be filled"
+        );
+
+        // Generate new block using context from the last branch.
+        let result = gen_single_block(requested_id, requested_ctx, ec, cb, ocb);
+
+        // If the block failed to compile
+        if result.is_err() {
+            // Remove previously compiled block
+            // versions from the version map
+            for blockref in &batch {
+                // FIXME: should be deallocating resources here too
+                // e.g. invariants, etc.
+                //free_block(blockref)
+
+                remove_block_version(blockref);
+            }
+
+            // Stop compiling
+            return None;
+        }
+
+        let new_blockref = result.unwrap();
+
+        // Add the block version to the VersionMap for this ISEQ
+        add_block_version(&new_blockref, cb);
+
+        // Connect the last branch and the new block
+        last_branch.blocks[0] = Some(new_blockref.clone());
+        last_branch.dst_addrs[0] = new_blockref.borrow().start_addr;
+        new_blockref
+            .borrow_mut()
+            .incoming
+            .push(last_branchref.clone());
+
+        // This block should immediately follow the last branch
+        assert!(new_blockref.borrow().start_addr == last_branch.end_addr);
+
+        // Track the block
+        batch.push(new_blockref.clone());
+
+        // Repeat with newest block
+        last_blockref = new_blockref;
+    }
+
+    Some(first_block)
+}
+
+/// Generate a block version that is an entry point inserted into an iseq
+/// NOTE: this function assumes that the VM lock has been taken
+pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
+    // Compute the current instruction index based on the current PC
+    let insn_idx: u32 = unsafe {
+        let pc_zero = rb_iseq_pc_at_idx(iseq, 0);
+        let ec_pc = get_cfp_pc(get_ec_cfp(ec));
+        ec_pc.offset_from(pc_zero).try_into().ok()?
+    };
+
+    // The entry context makes no assumptions about types
+    let blockid = BlockId {
+        iseq,
+        idx: insn_idx,
+    };
+
+    // Get the inline and outlined code blocks
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    // Write the interpreter entry prologue. Might be NULL when out of memory.
+    let code_ptr = gen_entry_prologue(cb, iseq, insn_idx);
+
+    // Try to generate code for the entry block
+    let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb);
+
+    cb.mark_all_executable();
+    ocb.unwrap().mark_all_executable();
+
+    match block {
+        // Compilation failed
+        None => return None,
+
+        // If the block contains no Ruby instructions
+        Some(block) => {
+            let block = block.borrow();
+            if block.end_idx == insn_idx {
+                return None;
+            }
+        }
+    }
+
+    // Compilation successful and block not empty
+    return code_ptr;
+}
+
+/// Generate code for a branch, possibly rewriting and changing the size of it
+fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) {
+    // FIXME
+    /*
+    if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
+        // Generating this branch would modify frozen bytes. Do nothing.
+        return;
+    }
+    */
+
+    let old_write_pos = cb.get_write_pos();
+
+    let mut block = branch.block.borrow_mut();
+    let branch_terminates_block = branch.end_addr == block.end_addr;
+
+    // Rewrite the branch
+    assert!(branch.dst_addrs[0].is_some());
+    cb.set_write_ptr(branch.start_addr.unwrap());
+    (branch.gen_fn)(
+        cb,
+        branch.dst_addrs[0].unwrap(),
+        branch.dst_addrs[1],
+        branch.shape,
+    );
+    branch.end_addr = Some(cb.get_write_ptr());
+
+    // The block may have shrunk after the branch is rewritten
+    if branch_terminates_block {
+        // Adjust block size
+        block.end_addr = branch.end_addr;
+    }
+
+    // cb.write_pos is both a write cursor and a marker for the end of
+    // everything written out so far. Leave cb->write_pos at the end of the
+    // block before returning. This function only ever bump or retain the end
+    // of block marker since that's what the majority of callers want. When the
+    // branch sits at the very end of the codeblock and it shrinks after
+    // regeneration, it's up to the caller to drop bytes off the end to
+    // not leave a gap and implement branch->shape.
+    if old_write_pos > cb.get_write_pos() {
+        // We rewound cb->write_pos to generate the branch, now restore it.
+        cb.set_pos(old_write_pos);
+    } else {
+        // The branch sits at the end of cb and consumed some memory.
+        // Keep cb.write_pos.
+    }
+}
+
+/// Create a new outgoing branch entry for a block
+fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef {
+    let branch = Branch {
+        // Block this is attached to
+        block: block.clone(),
+
+        // Positions where the generated code starts and ends
+        start_addr: None,
+        end_addr: None,
+
+        // Context right after the branch instruction
+        src_ctx: *src_ctx,
+
+        // Branch target blocks and their contexts
+        targets: [BLOCKID_NULL, BLOCKID_NULL],
+        target_ctxs: [Context::default(), Context::default()],
+        blocks: [None, None],
+
+        // Jump target addresses
+        dst_addrs: [None, None],
+
+        // Branch code generation function
+        gen_fn: gen_fn,
+
+        // Shape of the branch
+        shape: BranchShape::Default,
+    };
+
+    // Add to the list of outgoing branches for the block
+    let branchref = Rc::new(RefCell::new(branch));
+    block.borrow_mut().outgoing.push(branchref.clone());
+
+    return branchref;
+}
+
+/// Generated code calls this function with the SysV calling convention.
+/// See [get_branch_target].
+extern "sysv64" fn branch_stub_hit(
+    branch_ptr: *const c_void,
+    target_idx: u32,
+    ec: EcPtr,
+) -> *const u8 {
+    with_vm_lock(src_loc!(), || {
+        branch_stub_hit_body(branch_ptr, target_idx, ec)
+    })
+}
+
+/// Called by the generated code when a branch stub is executed
+/// Triggers compilation of branches and code patching
+fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 {
+    assert!(!branch_ptr.is_null());
+
+    //branch_ptr is actually:
+    //branch_ptr: *const RefCell<Branch>
+    let branch_rc = unsafe { BranchRef::from_raw(branch_ptr as *const RefCell<Branch>) };
+
+    // We increment the strong count because we want to keep the reference owned
+    // by the branch stub alive. Return branch stubs can be hit multiple times.
+    unsafe { Rc::increment_strong_count(branch_ptr) };
+
+    let mut branch = branch_rc.borrow_mut();
+    let branch_size_on_entry = branch.code_size();
+
+    let target_idx: usize = target_idx.as_usize();
+    let target = branch.targets[target_idx];
+    let target_ctx = branch.target_ctxs[target_idx];
+
+    let target_branch_shape = match target_idx {
+        0 => BranchShape::Next0,
+        1 => BranchShape::Next1,
+        _ => unreachable!("target_idx < 2 must always hold"),
+    };
+
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    // If this branch has already been patched, return the dst address
+    // Note: ractors can cause the same stub to be hit multiple times
+    if let Some(_) = branch.blocks[target_idx] {
+        return branch.dst_addrs[target_idx].unwrap().raw_ptr();
+    }
+
+    let (cfp, original_interp_sp) = unsafe {
+        let cfp = get_ec_cfp(ec);
+        let original_interp_sp = get_cfp_sp(cfp);
+
+        let reconned_pc = rb_iseq_pc_at_idx(rb_cfp_get_iseq(cfp), target.idx);
+        let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into());
+
+        // Update the PC in the current CFP, because it may be out of sync in JITted code
+        rb_set_cfp_pc(cfp, reconned_pc);
+
+        // :stub-sp-flush:
+        // Generated code do stack operations without modifying cfp->sp, while the
+        // cfp->sp tells the GC what values on the stack to root. Generated code
+        // generally takes care of updating cfp->sp when it calls runtime routines that
+        // could trigger GC, but it's inconvenient to do it before calling this function.
+        // So we do it here instead.
+        rb_set_cfp_sp(cfp, reconned_sp);
+
+        (cfp, original_interp_sp)
+    };
+
+    // Try to find an existing compiled version of this block
+    let mut block = find_block_version(target, &target_ctx);
+
+    // If this block hasn't yet been compiled
+    if block.is_none() {
+        let branch_old_shape = branch.shape;
+        let mut branch_modified = false;
+
+        // If the new block can be generated right after the branch (at cb->write_pos)
+        if Some(cb.get_write_ptr()) == branch.end_addr {
+            // This branch should be terminating its block
+            assert!(branch.end_addr == branch.block.borrow().end_addr);
+
+            // Change the branch shape to indicate the target block will be placed next
+            branch.shape = target_branch_shape;
+
+            // Rewrite the branch with the new, potentially more compact shape
+            regenerate_branch(cb, &mut branch);
+            branch_modified = true;
+
+            // Ensure that the branch terminates the codeblock just like
+            // before entering this if block. This drops bytes off the end
+            // in case we shrank the branch when regenerating.
+            cb.set_write_ptr(branch.end_addr.unwrap());
+        }
+
+        // Compile the new block version
+        drop(branch); // Stop mutable RefCell borrow since GC might borrow branch for marking
+        block = gen_block_series(target, &target_ctx, ec, cb, ocb);
+        branch = branch_rc.borrow_mut();
+
+        if block.is_none() && branch_modified {
+            // We couldn't generate a new block for the branch, but we modified the branch.
+            // Restore the branch by regenerating it.
+            branch.shape = branch_old_shape;
+            regenerate_branch(cb, &mut branch);
+        }
+    }
+
+    // Finish building the new block
+    let dst_addr = match block {
+        Some(block_rc) => {
+            let mut block: RefMut<_> = block_rc.borrow_mut();
+
+            // Branch shape should reflect layout
+            assert!(!(branch.shape == target_branch_shape && block.start_addr != branch.end_addr));
+
+            // Add this branch to the list of incoming branches for the target
+            block.incoming.push(branch_rc.clone());
+
+            // Update the branch target address
+            let dst_addr = block.start_addr;
+            branch.dst_addrs[target_idx] = dst_addr;
+
+            // Mark this branch target as patched (no longer a stub)
+            branch.blocks[target_idx] = Some(block_rc.clone());
+
+            // Rewrite the branch with the new jump target address
+            mem::drop(block); // end mut borrow
+            regenerate_branch(cb, &mut branch);
+
+            // Restore interpreter sp, since the code hitting the stub expects the original.
+            unsafe { rb_set_cfp_sp(cfp, original_interp_sp) };
+
+            block_rc.borrow().start_addr.unwrap()
+        }
+        None => {
+            // Failed to service the stub by generating a new block so now we
+            // need to exit to the interpreter at the stubbed location. We are
+            // intentionally *not* restoring original_interp_sp. At the time of
+            // writing, reconstructing interpreter state only involves setting
+            // cfp->sp and cfp->pc. We set both before trying to generate the
+            // block. All there is left to do to exit is to pop the native
+            // frame. We do that in code_for_exit_from_stub.
+            CodegenGlobals::get_stub_exit_code()
+        }
+    };
+
+    ocb.unwrap().mark_all_executable();
+    cb.mark_all_executable();
+
+    let new_branch_size = branch.code_size();
+    assert!(
+        new_branch_size <= branch_size_on_entry,
+        "branch stubs should never enlarge branches"
+    );
+
+    // Return a pointer to the compiled block version
+    dst_addr.raw_ptr()
+}
+
+/// Get a block version or stub corresponding to a branch target
+fn get_branch_target(
+    target: BlockId,
+    ctx: &Context,
+    branchref: &BranchRef,
+    target_idx: u32,
+    ocb: &mut OutlinedCb,
+) -> Option<CodePtr> {
+    let maybe_block = find_block_version(target, ctx);
+
+    // If the block already exists
+    if let Some(blockref) = maybe_block {
+        let mut block = blockref.borrow_mut();
+
+        // Add an incoming branch into this block
+        block.incoming.push(branchref.clone());
+        let mut branch = branchref.borrow_mut();
+        branch.blocks[target_idx.as_usize()] = Some(blockref.clone());
+
+        // Return a pointer to the compiled code for the block
+        return block.start_addr;
+    }
+
+    let ocb = ocb.unwrap();
+
+    // Generate an outlined stub that will call branch_stub_hit()
+    let stub_addr = ocb.get_write_ptr();
+
+    // Get a raw pointer to the branch while keeping the reference count alive
+    // Here clone increments the strong count by 1
+    // This means the branch stub owns its own reference to the branch
+    let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone());
+
+    // Call branch_stub_hit(branch_idx, target_idx, ec)
+    mov(ocb, C_ARG_REGS[2], REG_EC);
+    mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64));
+    mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8));
+    call_ptr(ocb, REG0, branch_stub_hit as *mut u8);
+
+    // Jump to the address returned by the
+    // branch_stub_hit call
+    jmp_rm(ocb, RAX);
+
+    if ocb.has_dropped_bytes() {
+        None // No space
+    } else {
+        Some(stub_addr)
+    }
+}
+
+pub fn gen_branch(
+    jit: &JITState,
+    src_ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    target0: BlockId,
+    ctx0: &Context,
+    target1: Option<BlockId>,
+    ctx1: Option<&Context>,
+    gen_fn: BranchGenFn,
+) {
+    assert!(target0 != BLOCKID_NULL);
+
+    let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn);
+
+    // Get the branch targets or stubs
+    let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
+    let dst_addr1 = if ctx1.is_some() {
+        get_branch_target(target1.unwrap(), ctx1.unwrap(), &branchref, 1, ocb)
+    } else {
+        None
+    };
+
+    let mut branch = branchref.borrow_mut();
+
+    // Set the branch target adresses
+    branch.dst_addrs[0] = dst_addr0;
+    branch.dst_addrs[1] = dst_addr1;
+
+    branch.targets[0] = target0;
+    if target1.is_some() {
+        branch.targets[1] = target1.unwrap();
+    }
+    branch.target_ctxs[0] = *ctx0;
+    branch.target_ctxs[1] = if ctx1.is_some() {
+        *ctx1.unwrap()
+    } else {
+        Context::default()
+    };
+
+    // Call the branch generation function
+    branch.start_addr = Some(cb.get_write_ptr());
+    regenerate_branch(cb, &mut branch);
+}
+
+fn gen_jump_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    if shape == BranchShape::Next1 {
+        panic!("Branch shape Next1 not allowed in gen_jump_branch!");
+    }
+
+    if shape == BranchShape::Default {
+        jmp_ptr(cb, target0);
+    }
+}
+
+pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) {
+    assert!(target0 != BLOCKID_NULL);
+
+    let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch);
+    let mut branch = branchref.borrow_mut();
+
+    branch.targets[0] = target0;
+    branch.target_ctxs[0] = *ctx;
+
+    let maybe_block = find_block_version(target0, ctx);
+
+    // If the block already exists
+    if let Some(blockref) = maybe_block {
+        let mut block = blockref.borrow_mut();
+
+        block.incoming.push(branchref.clone());
+
+        branch.dst_addrs[0] = block.start_addr;
+        branch.blocks[0] = Some(blockref.clone());
+        branch.shape = BranchShape::Default;
+
+        // Call the branch generation function
+        branch.start_addr = Some(cb.get_write_ptr());
+        gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+        branch.end_addr = Some(cb.get_write_ptr());
+    } else {
+        // This None target address signals gen_block_series() to compile the
+        // target block right after this one (fallthrough).
+        branch.dst_addrs[0] = None;
+        branch.shape = BranchShape::Next0;
+        branch.start_addr = Some(cb.get_write_ptr());
+        branch.end_addr = Some(cb.get_write_ptr());
+    }
+}
+
+/// Create a stub to force the code up to this point to be executed
+pub fn defer_compilation(
+    jit: &JITState,
+    cur_ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) {
+    if cur_ctx.chain_depth != 0 {
+        panic!("Double defer!");
+    }
+
+    let mut next_ctx = cur_ctx.clone();
+
+    if next_ctx.chain_depth >= u8::MAX {
+        panic!("max block version chain depth reached!");
+    }
+
+    next_ctx.chain_depth += 1;
+
+    let block_rc = jit.get_block();
+    let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch);
+    let mut branch = branch_rc.borrow_mut();
+    let block = block_rc.borrow();
+
+    branch.target_ctxs[0] = next_ctx;
+    branch.targets[0] = BlockId {
+        iseq: block.blockid.iseq,
+        idx: jit.get_insn_idx(),
+    };
+    branch.dst_addrs[0] = get_branch_target(branch.targets[0], &next_ctx, &branch_rc, 0, ocb);
+
+    // Call the branch generation function
+    branch.start_addr = Some(cb.get_write_ptr());
+    gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+    branch.end_addr = Some(cb.get_write_ptr());
+}
+
+// Remove all references to a block then free it.
+fn free_block(blockref: &BlockRef) {
+    use crate::invariants::*;
+
+    block_assumptions_free(blockref);
+
+    let block = blockref.borrow();
+
+    // Remove this block from the predecessor's targets
+    for pred_branchref in &block.incoming {
+        // Branch from the predecessor to us
+        let mut pred_branch = pred_branchref.borrow_mut();
+
+        // If this is us, nullify the target block
+        for pred_succ_ref in &mut pred_branch.blocks {
+            if let Some(pred_succ) = pred_succ_ref {
+                if pred_succ == blockref {
+                    *pred_succ_ref = None;
+                }
+            }
+        }
+    }
+
+    // For each outgoing branch
+    for out_branchref in &block.outgoing {
+        let out_branch = out_branchref.borrow();
+
+        // For each successor block
+        for succ in &out_branch.blocks {
+            if let Some(succ) = succ {
+                // Remove outgoing branch from the successor's incoming list
+                let mut succ_block = succ.borrow_mut();
+                succ_block
+                    .incoming
+                    .retain(|succ_incoming| !Rc::ptr_eq(succ_incoming, out_branchref));
+            }
+        }
+    }
+
+    // No explicit deallocation here as blocks are ref-counted.
+}
+
+// Some runtime checks for integrity of a program location
+pub fn verify_blockid(blockid: BlockId) {
+    unsafe {
+        assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0);
+        assert!(blockid.idx < get_iseq_encoded_size(blockid.iseq));
+    }
+}
+
+// Invalidate one specific block version
+pub fn invalidate_block_version(blockref: &BlockRef) {
+    //ASSERT_vm_locking();
+
+    // TODO: want to assert that all other ractors are stopped here. Can't patch
+    // machine code that some other thread is running.
+
+    let block = blockref.borrow();
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    verify_blockid(block.blockid);
+
+    // Remove this block from the version array
+    remove_block_version(blockref);
+
+    // Get a pointer to the generated code for this block
+    let code_ptr = block.start_addr;
+
+    // Make the the start of the block do an exit. This handles OOM situations
+    // and some cases where we can't efficiently patch incoming branches.
+    // Do this first, since in case there is a fallthrough branch into this
+    // block, the patching loop below can overwrite the start of the block.
+    // In those situations, there is hopefully no jumps to the start of the block
+    // after patching as the start of the block would be in the middle of something
+    // generated by branch_t::gen_fn.
+    {
+        let block_start = block
+            .start_addr
+            .expect("invalidation needs constructed block");
+        let block_end = block
+            .end_addr
+            .expect("invalidation needs constructed block");
+        let block_entry_exit = block
+            .entry_exit
+            .expect("invalidation needs the entry_exit field");
+
+        if block_start == block_entry_exit {
+            // Some blocks exit on entry. Patching a jump to the entry at the
+            // entry makes an infinite loop.
+        } else {
+            // TODO(alan)
+            // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region
+
+            // Patch in a jump to block.entry_exit.
+            let cur_pos = cb.get_write_ptr();
+            cb.set_write_ptr(block_start);
+            jmp_ptr(cb, block_entry_exit);
+            assert!(
+                cb.get_write_ptr() < block_end,
+                "invalidation wrote past end of block"
+            );
+            cb.set_write_ptr(cur_pos);
+        }
+    }
+
+    // For each incoming branch
+    for branchref in &block.incoming {
+        let mut branch = branchref.borrow_mut();
+        let target_idx = if branch.dst_addrs[0] == code_ptr {
+            0
+        } else {
+            1
+        };
+        assert_eq!(branch.dst_addrs[target_idx], code_ptr);
+        assert_eq!(blockref, branch.blocks[target_idx].as_ref().unwrap());
+
+        // Mark this target as being a stub
+        branch.blocks[target_idx] = None;
+
+        // TODO(alan):
+        // Don't patch frozen code region
+        // if (branch.start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
+        //     continue;
+        // }
+
+        // Create a stub for this branch target
+        mem::drop(branch); // end RefCell borrow as get_branch_target() can borrow the branch.
+        let mut branch_target =
+            get_branch_target(block.blockid, &block.ctx, branchref, target_idx as u32, ocb);
+
+        if branch_target.is_none() {
+            // We were unable to generate a stub (e.g. OOM). Use the block's
+            // exit instead of a stub for the block. It's important that we
+            // still patch the branch in this situation so stubs are unique
+            // to branches. Think about what could go wrong if we run out of
+            // memory in the middle of this loop.
+            branch_target = block.entry_exit;
+        }
+
+        branch = branchref.borrow_mut();
+        branch.dst_addrs[target_idx] = branch_target;
+
+        // Check if the invalidated block immediately follows
+        let target_next = block.start_addr == branch.end_addr;
+
+        if target_next {
+            // The new block will no longer be adjacent.
+            // Note that we could be enlarging the branch and writing into the
+            // start of the block being invalidated.
+            branch.shape = BranchShape::Default;
+        }
+
+        // Rewrite the branch with the new jump target address
+        regenerate_branch(cb, &mut branch);
+
+        if target_next && branch.end_addr > block.end_addr {
+            dbg!(
+                branch.block.borrow().blockid.idx,
+                block.blockid.idx,
+                branch.end_addr,
+                block.end_addr,
+                block.code_size()
+            );
+            panic!("yjit invalidate rewrote branch past end of invalidated block");
+        }
+    }
+
+    // Clear out the JIT func so that we can recompile later and so the
+    // interpreter will run the iseq.
+    //
+    // Only clear the jit_func when we're invalidating the JIT entry block.
+    // We only support compiling iseqs from index 0 right now.  So entry
+    // points will always have an instruction index of 0.  We'll need to
+    // change this in the future when we support optional parameters because
+    // they enter the function with a non-zero PC
+    if block.blockid.idx == 0 {
+        unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) };
+    }
+
+    // TODO:
+    // May want to recompile a new entry point (for interpreter entry blocks)
+    // This isn't necessary for correctness
+
+    // FIXME:
+    // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
+
+    free_block(blockref);
+
+    ocb.unwrap().mark_all_executable();
+    cb.mark_all_executable();
+
+    incr_counter!(invalidation_count);
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core::*;
+
+    #[test]
+    fn types() {
+        // Valid src => dst
+        assert_eq!(Type::Unknown.diff(Type::Unknown), 0);
+        assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), 0);
+        assert_ne!(Type::UnknownImm.diff(Type::Unknown), usize::MAX);
+        assert_ne!(Type::Fixnum.diff(Type::Unknown), usize::MAX);
+        assert_ne!(Type::Fixnum.diff(Type::UnknownImm), usize::MAX);
+
+        // Invalid src => dst
+        assert_eq!(Type::Unknown.diff(Type::UnknownImm), usize::MAX);
+        assert_eq!(Type::Unknown.diff(Type::Fixnum), usize::MAX);
+        assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), usize::MAX);
+    }
+
+    #[test]
+    fn context() {
+        // Valid src => dst
+        assert_eq!(Context::default().diff(&Context::default()), 0);
+
+        // Try pushing an operand and getting its type
+        let mut ctx = Context::default();
+        ctx.stack_push(Type::Fixnum);
+        let top_type = ctx.get_opnd_type(StackOpnd(0));
+        assert!(top_type == Type::Fixnum);
+
+        // TODO: write more tests for Context type diff
+    }
+}
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
new file mode 100644
index 0000000000..9e386ea871
--- /dev/null
+++ b/yjit/src/cruby.rs
@@ -0,0 +1,919 @@
+//! This module deals with making relevant C functions available to Rust YJIT.
+//! Some C functions we use we maintain, some are public C extension APIs,
+//! some are internal CRuby APIs.
+//!
+//! ## General notes about linking
+//!
+//! The YJIT crate compiles to a native static library, which for our purposes
+//! we can understand as a collection of object files. On ELF platforms at least,
+//! object files can refer to "external symbols" which we could take some
+//! liberty and understand as assembly labels that refer to code defined in other
+//! object files resolved when linking. When we are linking, say to produce miniruby,
+//! the linker resolves and put concrete addresses for each usage of C function in
+//! the Rust static library.
+//!
+//! By declaring external functions and using them, we are asserting the symbols
+//! we use have definition in one of the object files we pass to the linker. Declaring
+//! a function here that has no definition anywhere causes a linking error.
+//!
+//! There are more things going on during linking and this section makes a lot of
+//! simplifications but hopefully this gives a good enough working mental model.
+//!
+//! ## Difference from example in the Rustonomicon
+//!
+//! You might be wondering about why this is different from the [FFI example]
+//! in the Nomicon, an official book about Unsafe Rust.
+//!
+//! There is no `#[link]` attribute because we are not linking against an external
+//! library, but rather implicitly asserting that we'll supply a concrete definition
+//! for all C functions we call, similar to how pure C projects put functions
+//! across different compilation units and link them together.
+//!
+//! TODO(alan): is the model different enough on Windows that this setup is unworkable?
+//!             Seems prudent to at least learn more about Windows binary tooling before
+//!             committing to a design.
+//!
+//! Alan recommends reading the Nomicon cover to cover as he thinks the book is
+//! not very long in general and especially for something that can save hours of
+//! debugging Undefined Behavior (UB) down the road.
+//!
+//! UBs can cause Safe Rust to crash, at which point it's hard to tell which
+//! usage of `unsafe` in the codebase invokes UB. Providing safe Rust interface
+//! wrapping `unsafe` Rust is a good technique, but requires practice and knowledge
+//! about what's well defined and what's undefined.
+//!
+//! For an extremely advanced example of building safe primitives using Unsafe Rust,
+//! see the [GhostCell] paper. Some parts of the paper assume less background knowledge
+//! than other parts, so there should be learning opportunities in it for all experience
+//! levels.
+//!
+//! ## Binding generation
+//!
+//! For the moment declarations on the Rust side are hand written. The code is boilerplate
+//! and could be generated automatically with a custom tooling that depend on
+//! rust-lang/rust-bindgen. The output Rust code could be checked in to version control
+//! and verified on CI like `make update-deps`.
+//!
+//! Upsides for this design:
+//!  - the YJIT static lib that links with miniruby and friends will not need bindgen
+//!    as a dependency at all. This is an important property so Ruby end users can
+//!    build a YJIT enabled Ruby with no internet connection using a release tarball
+//!  - Less hand-typed boilerplate
+//!  - Helps reduce risk of C definitions and Rust declaration going out of sync since
+//!    CI verifies synchronicity
+//!
+//! Downsides and known unknowns:
+//!  - Using rust-bindgen this way seems unusual. We might be depending on parts
+//!    that the project is not committed to maintaining
+//!  - This setup assumes rust-bindgen gives deterministic output, which can't be taken
+//!    for granted
+//!  - YJIT contributors will need to install libclang on their system to get rust-bindgen
+//!    to work if they want to run the generation tool locally
+//!
+//! The elephant in the room is that we'll still need to use Unsafe Rust to call C functions,
+//! and the binding generation can't magically save us from learning Unsafe Rust.
+//!
+//!
+//! [FFI example]: https://doc.rust-lang.org/nomicon/ffi.html
+//! [GhostCell]: http://plv.mpi-sws.org/rustbelt/ghostcell/
+
+// CRuby types use snake_case. Allow them so we use one name across languages.
+#![allow(non_camel_case_types)]
+// A lot of imported CRuby globals aren't all-caps
+#![allow(non_upper_case_globals)]
+
+use std::convert::From;
+use std::ffi::CString;
+use std::os::raw::{c_char, c_int, c_long, c_uint, c_void};
+use std::panic::{catch_unwind, UnwindSafe};
+
+// We check that we can do this with the configure script and a couple of
+// static asserts. u64 and not usize to play nice with lowering to x86.
+pub type size_t = u64;
+
+/// A type alias for the redefinition flags coming from CRuby. These are just
+/// shifted 1s but not explicitly an enum.
+pub type RedefinitionFlag = u32;
+
+// Textually include output from rust-bindgen as suggested by its user guide.
+include!("cruby_bindings.inc.rs");
+
+// TODO: For #defines that affect memory layout, we need to check for them
+// on build and fail if they're wrong. e.g. USE_FLONUM *must* be true.
+
+// TODO:
+// Temporary, these external bindings will likely be auto-generated
+// and textually included in this file
+extern "C" {
+    #[link_name = "rb_yjit_alloc_exec_mem"] // we can rename functions with this attribute
+    pub fn alloc_exec_mem(mem_size: u32) -> *mut u8;
+
+    #[link_name = "rb_insn_name"]
+    pub fn raw_insn_name(insn: VALUE) -> *const c_char;
+
+    #[link_name = "rb_insn_len"]
+    pub fn raw_insn_len(v: VALUE) -> c_int;
+
+    #[link_name = "rb_yarv_class_of"]
+    pub fn CLASS_OF(v: VALUE) -> VALUE;
+
+    #[link_name = "rb_get_ec_cfp"]
+    pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr;
+
+    #[link_name = "rb_get_cfp_pc"]
+    pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cfp_sp"]
+    pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cfp_self"]
+    pub fn get_cfp_self(cfp: CfpPtr) -> VALUE;
+
+    #[link_name = "rb_get_cfp_ep"]
+    pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cme_def_type"]
+    pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t;
+
+    #[link_name = "rb_get_cme_def_method_serial"]
+    pub fn get_cme_def_method_serial(cme: *const rb_callable_method_entry_t) -> u64;
+
+    #[link_name = "rb_get_cme_def_body_attr_id"]
+    pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
+
+    #[link_name = "rb_get_cme_def_body_optimized_type"]
+    pub fn get_cme_def_body_optimized_type(
+        cme: *const rb_callable_method_entry_t,
+    ) -> method_optimized_type;
+
+    #[link_name = "rb_get_cme_def_body_optimized_index"]
+    pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint;
+
+    #[link_name = "rb_get_cme_def_body_cfunc"]
+    pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t)
+        -> *mut rb_method_cfunc_t;
+
+    #[link_name = "rb_get_def_method_serial"]
+    /// While this returns a uintptr_t in C, we always use it as a Rust u64
+    pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64;
+
+    #[link_name = "rb_get_def_original_id"]
+    pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID;
+
+    #[link_name = "rb_get_mct_argc"]
+    pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int;
+
+    #[link_name = "rb_get_mct_func"]
+    pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8;
+
+    #[link_name = "rb_get_def_iseq_ptr"]
+    pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr;
+
+    #[link_name = "rb_iseq_encoded_size"]
+    pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_local_iseq"]
+    pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr;
+
+    #[link_name = "rb_get_iseq_body_iseq_encoded"]
+    pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_iseq_body_builtin_inline_p"]
+    pub fn get_iseq_body_builtin_inline_p(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_body_stack_max"]
+    pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_flags_has_opt"]
+    pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_kw"]
+    pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_rest"]
+    pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_post"]
+    pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_kwrest"]
+    pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_block"]
+    pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"]
+    pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_body_local_table_size"]
+    pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_param_keyword"]
+    pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct;
+
+    #[link_name = "rb_get_iseq_body_param_size"]
+    pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_param_lead_num"]
+    pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int;
+
+    #[link_name = "rb_get_iseq_body_param_opt_num"]
+    pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int;
+
+    #[link_name = "rb_get_iseq_body_param_opt_table"]
+    pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE;
+
+    #[link_name = "rb_get_cikw_keyword_len"]
+    pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int;
+
+    #[link_name = "rb_get_cikw_keywords_idx"]
+    pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE;
+
+    #[link_name = "rb_get_call_data_ci"]
+    pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo;
+
+    #[link_name = "rb_yarv_str_eql_internal"]
+    pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
+
+    #[link_name = "rb_yarv_ary_entry_internal"]
+    pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE;
+
+    #[link_name = "rb_FL_TEST"]
+    pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
+
+    #[link_name = "rb_FL_TEST_RAW"]
+    pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
+
+    #[link_name = "rb_RB_TYPE_P"]
+    pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
+
+    #[link_name = "rb_BASIC_OP_UNREDEFINED_P"]
+    pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool;
+
+    #[link_name = "rb_RSTRUCT_LEN"]
+    pub fn RSTRUCT_LEN(st: VALUE) -> c_long;
+
+    #[link_name = "rb_RSTRUCT_SET"]
+    pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE);
+
+    // Ruby only defines these in vm_insnhelper.c, not in any header.
+    // Parsing it would result in a lot of duplicate definitions.
+    pub fn rb_vm_opt_mod(recv: VALUE, obj: VALUE) -> VALUE;
+    pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE;
+    pub fn rb_vm_defined(
+        ec: EcPtr,
+        reg_cfp: CfpPtr,
+        op_type: rb_num_t,
+        obj: VALUE,
+        v: VALUE,
+    ) -> bool;
+    pub fn rb_vm_set_ivar_idx(obj: VALUE, idx: u32, val: VALUE) -> VALUE;
+    pub fn rb_vm_setinstancevariable(iseq: IseqPtr, obj: VALUE, id: ID, val: VALUE, ic: IVC);
+    pub fn rb_aliased_callable_method_entry(
+        me: *const rb_callable_method_entry_t,
+    ) -> *const rb_callable_method_entry_t;
+    pub fn rb_iseq_only_optparam_p(iseq: IseqPtr) -> bool;
+    pub fn rb_iseq_only_kwparam_p(iseq: IseqPtr) -> bool;
+    pub fn rb_vm_getclassvariable(iseq: IseqPtr, cfp: CfpPtr, id: ID, ic: ICVARC) -> VALUE;
+    pub fn rb_vm_setclassvariable(
+        iseq: IseqPtr,
+        cfp: CfpPtr,
+        id: ID,
+        val: VALUE,
+        ic: ICVARC,
+    ) -> VALUE;
+    pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool;
+
+    #[link_name = "rb_vm_ci_argc"]
+    pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int;
+
+    #[link_name = "rb_vm_ci_mid"]
+    pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID;
+
+    #[link_name = "rb_vm_ci_flag"]
+    pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint;
+
+    #[link_name = "rb_vm_ci_kwarg"]
+    pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg;
+
+    #[link_name = "rb_METHOD_ENTRY_VISI"]
+    pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t;
+
+    pub fn rb_yjit_branch_stub_hit(
+        branch_ptr: *const c_void,
+        target_idx: u32,
+        ec: EcPtr,
+    ) -> *const c_void;
+
+    pub fn rb_str_bytesize(str: VALUE) -> VALUE;
+
+    #[link_name = "rb_RCLASS_ORIGIN"]
+    pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE;
+}
+
+/// Helper so we can get a Rust string for insn_name()
+pub fn insn_name(opcode: usize) -> String {
+    use std::ffi::CStr;
+
+    unsafe {
+        // Look up Ruby's NULL-terminated insn name string
+        let op_name = raw_insn_name(VALUE(opcode));
+
+        // Convert the op name C string to a Rust string and concat
+        let op_name = CStr::from_ptr(op_name).to_str().unwrap();
+
+        // Convert into an owned string
+        op_name.to_string()
+    }
+}
+
+#[allow(unused_variables)]
+pub fn insn_len(opcode: usize) -> u32 {
+    #[cfg(test)]
+    panic!("insn_len is a CRuby function, and we don't link against CRuby for Rust testing!");
+
+    #[cfg(not(test))]
+    unsafe {
+        raw_insn_len(VALUE(opcode)).try_into().unwrap()
+    }
+}
+
+/// Opaque iseq type for opaque iseq pointers from vm_core.h
+/// See: <https://doc.rust-lang.org/nomicon/ffi.html#representing-opaque-structs>
+#[repr(C)]
+pub struct rb_iseq_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// An object handle similar to VALUE in the C code. Our methods assume
+/// that this is a handle. Sometimes the C code briefly uses VALUE as
+/// an unsigned integer type and don't necessarily store valid handles but
+/// thankfully those cases are rare and don't cross the FFI boundary.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[repr(transparent)] // same size and alignment as simply `usize`
+pub struct VALUE(pub usize);
+
+/// Pointer to an ISEQ
+pub type IseqPtr = *const rb_iseq_t;
+
+/// Opaque execution-context type from vm_core.h
+#[repr(C)]
+pub struct rb_execution_context_struct {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+/// Alias for rb_execution_context_struct used by CRuby sometimes
+pub type rb_execution_context_t = rb_execution_context_struct;
+
+/// Pointer to an execution context (rb_execution_context_struct)
+pub type EcPtr = *const rb_execution_context_struct;
+
+// From method.h
+#[repr(C)]
+pub struct rb_method_definition_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+type rb_method_definition_struct = rb_method_definition_t;
+
+/// Opaque cfunc type from method.h
+#[repr(C)]
+pub struct rb_method_cfunc_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque FILE type from the C standard library
+#[repr(C)]
+pub struct FILE {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque call-cache type from vm_callinfo.h
+#[repr(C)]
+pub struct rb_callcache {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque call-info type from vm_callinfo.h
+#[repr(C)]
+pub struct rb_callinfo_kwarg {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque control_frame (CFP) struct from vm_core.h
+#[repr(C)]
+pub struct rb_control_frame_struct {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Pointer to a control frame pointer (CFP)
+pub type CfpPtr = *mut rb_control_frame_struct;
+
+/// Opaque struct from vm_core.h
+#[repr(C)]
+pub struct rb_cref_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+impl VALUE {
+    /// Dump info about the value to the console similarly to rp(VALUE)
+    pub fn dump_info(self) {
+        unsafe { rb_obj_info_dump(self) }
+    }
+
+    /// Return whether the value is truthy or falsy in Ruby -- only nil and false are falsy.
+    pub fn test(self) -> bool {
+        let VALUE(cval) = self;
+        let VALUE(qnilval) = Qnil;
+        (cval & !qnilval) != 0
+    }
+
+    /// Return true if the number is an immediate integer, flonum or static symbol
+    fn immediate_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 7) != 0
+    }
+
+    /// Return true if the value is a Ruby immediate integer, flonum, static symbol, nil or false
+    pub fn special_const_p(self) -> bool {
+        self.immediate_p() || !self.test()
+    }
+
+    /// Return true if the value is a Ruby Fixnum (immediate-size integer)
+    pub fn fixnum_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 1) == 1
+    }
+
+    /// Return true if the value is an immediate Ruby floating-point number (flonum)
+    pub fn flonum_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 3) == 2
+    }
+
+    /// Return true for a static (non-heap) Ruby symbol
+    pub fn static_sym_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 0xff) == RUBY_SYMBOL_FLAG
+    }
+
+    /// Returns true or false depending on whether the value is nil
+    pub fn nil_p(self) -> bool {
+        self == Qnil
+    }
+
+    /// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY)
+    pub fn builtin_type(self) -> ruby_value_type {
+        assert!(!self.special_const_p());
+
+        let VALUE(cval) = self;
+        let rbasic_ptr = cval as *const RBasic;
+        let flags_bits: usize = unsafe { (*rbasic_ptr).flags }.as_usize();
+        (flags_bits & (RUBY_T_MASK as usize)) as ruby_value_type
+    }
+
+    pub fn class_of(self) -> VALUE {
+        unsafe { CLASS_OF(self) }
+    }
+
+    pub fn as_isize(self) -> isize {
+        let VALUE(is) = self;
+        is as isize
+    }
+
+    pub fn as_i32(self) -> i32 {
+        self.as_i64().try_into().unwrap()
+    }
+
+    pub fn as_u32(self) -> u32 {
+        let VALUE(i) = self;
+        i.try_into().unwrap()
+    }
+
+    pub fn as_i64(self) -> i64 {
+        let VALUE(i) = self;
+        i as i64
+    }
+
+    pub fn as_u64(self) -> u64 {
+        let VALUE(i) = self;
+        i.try_into().unwrap()
+    }
+
+    pub fn as_usize(self) -> usize {
+        let VALUE(us) = self;
+        us as usize
+    }
+
+    pub fn as_ptr<T>(self) -> *const T {
+        let VALUE(us) = self;
+        us as *const T
+    }
+
+    pub fn as_mut_ptr<T>(self) -> *mut T {
+        let VALUE(us) = self;
+        us as *mut T
+    }
+
+    /// For working with opague pointers and encoding null check.
+    /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>`
+    /// is for `*mut T` while our C functions are setup to use `*const T`.
+    /// Casting from `NonNull<T>` to `*const T` is too noisy.
+    pub fn as_optional_ptr<T>(self) -> Option<*const T> {
+        let ptr: *const T = self.as_ptr();
+
+        if ptr.is_null() {
+            None
+        } else {
+            Some(ptr)
+        }
+    }
+
+    /// Assert that `self` is an iseq in debug builds
+    pub fn as_iseq(self) -> IseqPtr {
+        let ptr: IseqPtr = self.as_ptr();
+
+        #[cfg(debug_assertions)]
+        if !ptr.is_null() {
+            unsafe { rb_assert_iseq_handle(self) }
+        }
+
+        ptr
+    }
+
+    /// Assert that `self` is a method entry in debug builds
+    pub fn as_cme(self) -> *const rb_callable_method_entry_t {
+        let ptr: *const rb_callable_method_entry_t = self.as_ptr();
+
+        #[cfg(debug_assertions)]
+        if !ptr.is_null() {
+            unsafe { rb_assert_cme_handle(self) }
+        }
+
+        ptr
+    }
+}
+
+impl VALUE {
+    pub fn fixnum_from_usize(item: usize) -> Self {
+        assert!(item <= (RUBY_FIXNUM_MAX as usize)); // An unsigned will always be greater than RUBY_FIXNUM_MIN
+        let k: usize = item.wrapping_add(item.wrapping_add(1));
+        VALUE(k)
+    }
+}
+
+impl From<IseqPtr> for VALUE {
+    /// For `.into()` convenience
+    fn from(iseq: IseqPtr) -> Self {
+        VALUE(iseq as usize)
+    }
+}
+
+impl From<*const rb_callable_method_entry_t> for VALUE {
+    /// For `.into()` convenience
+    fn from(cme: *const rb_callable_method_entry_t) -> Self {
+        VALUE(cme as usize)
+    }
+}
+
+impl From<VALUE> for u64 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        uimm as u64
+    }
+}
+
+impl From<VALUE> for i64 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        assert!(uimm <= (i64::MAX as usize));
+        uimm as i64
+    }
+}
+
+impl From<VALUE> for i32 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        assert!(uimm <= (i32::MAX as usize));
+        uimm as i32
+    }
+}
+
+/// Produce a Ruby string from a Rust string slice
+pub fn rust_str_to_ruby(str: &str) -> VALUE {
+    unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) }
+}
+
+/// Produce a Ruby symbol from a Rust string slice
+pub fn rust_str_to_sym(str: &str) -> VALUE {
+    let c_str = CString::new(str).unwrap();
+    let c_ptr: *const c_char = c_str.as_ptr();
+
+    unsafe { rb_id2sym(rb_intern(c_ptr)) }
+}
+
+/// A location in Rust code for integrating with debugging facilities defined in C.
+/// Use the [src_loc!] macro to crate an instance.
+pub struct SourceLocation {
+    pub file: CString,
+    pub line: c_int,
+}
+
+/// Make a [SourceLocation] at the current spot.
+macro_rules! src_loc {
+    () => {
+        // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds.
+        $crate::cruby::SourceLocation {
+            file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths
+            line: line!().try_into().unwrap(),              // not that many lines
+        }
+    };
+}
+
+pub(crate) use src_loc;
+
+/// Run GC write barrier. Required after making a new edge in the object reference
+/// graph from `old` to `young`.
+macro_rules! obj_written {
+    ($old: expr, $young: expr) => {
+        let (old, young): (VALUE, VALUE) = ($old, $young);
+        let src_loc = $crate::cruby::src_loc!();
+        unsafe { rb_yjit_obj_written(old, young, src_loc.file.as_ptr(), src_loc.line) };
+    };
+}
+pub(crate) use obj_written;
+
+/// Acquire the VM lock, make sure all other Ruby threads are asleep then run
+/// some code while holding the lock. Returns whatever `func` returns.
+/// Use with [src_loc!].
+///
+/// Required for code patching in the presence of ractors.
+pub fn with_vm_lock<F, R>(loc: SourceLocation, func: F) -> R
+where
+    F: FnOnce() -> R + UnwindSafe,
+{
+    let file = loc.file.as_ptr();
+    let line = loc.line;
+    let mut recursive_lock_level: c_uint = 0;
+
+    unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) };
+
+    let ret = match catch_unwind(func) {
+        Ok(result) => result,
+        Err(_) => {
+            // Theoretically we can recover from some of these panics,
+            // but it's too late if the unwind reaches here.
+            use std::{io, process, str};
+
+            let _ = catch_unwind(|| {
+                // IO functions can panic too.
+                eprintln!(
+                    "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...",
+                    str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"),
+                    line,
+                );
+            });
+            process::abort();
+        }
+    };
+
+    unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) };
+
+    ret
+}
+
+// Non-idiomatic capitalization for consistency with CRuby code
+#[allow(non_upper_case_globals)]
+pub const Qfalse: VALUE = VALUE(0);
+#[allow(non_upper_case_globals)]
+pub const Qnil: VALUE = VALUE(8);
+#[allow(non_upper_case_globals)]
+pub const Qtrue: VALUE = VALUE(20);
+#[allow(non_upper_case_globals)]
+pub const Qundef: VALUE = VALUE(52);
+
+pub const RUBY_SYMBOL_FLAG: usize = 0x0c;
+
+pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize;
+pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize;
+
+pub const RUBY_FIXNUM_MIN: isize = RUBY_LONG_MIN / 2;
+pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2;
+pub const RUBY_FIXNUM_FLAG: usize = 0x1;
+
+pub const RUBY_FLONUM_FLAG: usize = 0x2;
+pub const RUBY_FLONUM_MASK: usize = 0x3;
+
+pub const RUBY_IMMEDIATE_MASK: usize = 0x7;
+
+pub const RUBY_SPECIAL_SHIFT: usize = 8;
+
+// Constants from vm_core.h
+pub const VM_SPECIAL_OBJECT_VMCORE: usize = 0x1;
+pub const VM_ENV_DATA_INDEX_SPECVAL: isize = -1;
+pub const VM_ENV_DATA_INDEX_FLAGS: isize = 0;
+pub const VM_ENV_DATA_SIZE: usize = 3;
+
+// From vm_callinfo.h
+pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit;
+pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit;
+pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit;
+pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit;
+pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit;
+pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit;
+
+pub const SIZEOF_VALUE: usize = 8;
+pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32;
+
+pub const RUBY_FL_SINGLETON: usize = RUBY_FL_USER_0;
+
+pub const ROBJECT_EMBED: usize = RUBY_FL_USER_1;
+pub const ROBJECT_EMBED_LEN_MAX: usize = 3; // This is a complex calculation in ruby/internal/core/robject.h
+
+pub const RMODULE_IS_REFINEMENT: usize = RUBY_FL_USER_3;
+
+// Constants from include/ruby/internal/fl_type.h
+pub const RUBY_FL_USHIFT: usize = 12;
+pub const RUBY_FL_USER_0: usize = 1 << (RUBY_FL_USHIFT + 0);
+pub const RUBY_FL_USER_1: usize = 1 << (RUBY_FL_USHIFT + 1);
+pub const RUBY_FL_USER_2: usize = 1 << (RUBY_FL_USHIFT + 2);
+pub const RUBY_FL_USER_3: usize = 1 << (RUBY_FL_USHIFT + 3);
+pub const RUBY_FL_USER_4: usize = 1 << (RUBY_FL_USHIFT + 4);
+pub const RUBY_FL_USER_5: usize = 1 << (RUBY_FL_USHIFT + 5);
+pub const RUBY_FL_USER_6: usize = 1 << (RUBY_FL_USHIFT + 6);
+pub const RUBY_FL_USER_7: usize = 1 << (RUBY_FL_USHIFT + 7);
+pub const RUBY_FL_USER_8: usize = 1 << (RUBY_FL_USHIFT + 8);
+pub const RUBY_FL_USER_9: usize = 1 << (RUBY_FL_USHIFT + 9);
+pub const RUBY_FL_USER_10: usize = 1 << (RUBY_FL_USHIFT + 10);
+pub const RUBY_FL_USER_11: usize = 1 << (RUBY_FL_USHIFT + 11);
+pub const RUBY_FL_USER_12: usize = 1 << (RUBY_FL_USHIFT + 12);
+pub const RUBY_FL_USER_13: usize = 1 << (RUBY_FL_USHIFT + 13);
+pub const RUBY_FL_USER_14: usize = 1 << (RUBY_FL_USHIFT + 14);
+pub const RUBY_FL_USER_15: usize = 1 << (RUBY_FL_USHIFT + 15);
+pub const RUBY_FL_USER_16: usize = 1 << (RUBY_FL_USHIFT + 16);
+pub const RUBY_FL_USER_17: usize = 1 << (RUBY_FL_USHIFT + 17);
+pub const RUBY_FL_USER_18: usize = 1 << (RUBY_FL_USHIFT + 18);
+pub const RUBY_FL_USER_19: usize = 1 << (RUBY_FL_USHIFT + 19);
+
+// Constants from include/ruby/internal/core/rarray.h
+pub const RARRAY_EMBED_FLAG: usize = RUBY_FL_USER_1;
+pub const RARRAY_EMBED_LEN_SHIFT: usize = RUBY_FL_USHIFT + 3;
+pub const RARRAY_EMBED_LEN_MASK: usize = RUBY_FL_USER_3 | RUBY_FL_USER_4;
+
+// From internal/struct.h
+pub const RSTRUCT_EMBED_LEN_MASK: usize = RUBY_FL_USER_2 | RUBY_FL_USER_1;
+
+// From iseq.h
+pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER_7;
+
+// We'll need to encode a lot of Ruby struct/field offsets as constants unless we want to
+// redeclare all the Ruby C structs and write our own offsetof macro. For now, we use constants.
+pub const RUBY_OFFSET_RBASIC_FLAGS: i32 = 0; // struct RBasic, field "flags"
+pub const RUBY_OFFSET_RBASIC_KLASS: i32 = 8; // struct RBasic, field "klass"
+pub const RUBY_OFFSET_RARRAY_AS_HEAP_LEN: i32 = 16; // struct RArray, subfield "as.heap.len"
+pub const RUBY_OFFSET_RARRAY_AS_HEAP_PTR: i32 = 32; // struct RArray, subfield "as.heap.ptr"
+pub const RUBY_OFFSET_RARRAY_AS_ARY: i32 = 16; // struct RArray, subfield "as.ary"
+
+pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr"
+pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary"
+
+pub const RUBY_OFFSET_ROBJECT_AS_ARY: i32 = 16; // struct RObject, subfield "as.ary"
+pub const RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV: i32 = 16; // struct RObject, subfield "as.heap.numiv"
+pub const RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR: i32 = 24; // struct RObject, subfield "as.heap.ivptr"
+
+// Constants from rb_control_frame_t vm_core.h
+pub const RUBY_OFFSET_CFP_PC: i32 = 0;
+pub const RUBY_OFFSET_CFP_SP: i32 = 8;
+pub const RUBY_OFFSET_CFP_ISEQ: i32 = 16;
+pub const RUBY_OFFSET_CFP_SELF: i32 = 24;
+pub const RUBY_OFFSET_CFP_EP: i32 = 32;
+pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40;
+pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__
+pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56;
+pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64;
+
+// Constants from rb_execution_context_t vm_core.h
+pub const RUBY_OFFSET_EC_CFP: i32 = 16;
+pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32)
+pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32)
+pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48;
+
+// Constants from rb_thread_t in vm_core.h
+pub const RUBY_OFFSET_THREAD_SELF: i32 = 16;
+
+// Constants from iseq_inline_constant_cache (IC) and iseq_inline_constant_cache_entry (ICE) in vm_core.h
+pub const RUBY_OFFSET_IC_ENTRY: i32 = 0;
+pub const RUBY_OFFSET_ICE_VALUE: i32 = 8;
+
+// TODO: need to dynamically autogenerate constants for all the YARV opcodes from insns.def
+// TODO: typing of these adds unnecessary casting
+pub const OP_NOP: usize = 0;
+pub const OP_GETLOCAL: usize = 1;
+pub const OP_SETLOCAL: usize = 2;
+pub const OP_GETBLOCKPARAM: usize = 3;
+pub const OP_SETBLOCKPARAM: usize = 4;
+pub const OP_GETBLOCKPARAMPROXY: usize = 5;
+pub const OP_GETSPECIAL: usize = 6;
+pub const OP_SETSPECIAL: usize = 7;
+pub const OP_GETINSTANCEVARIABLE: usize = 8;
+pub const OP_SETINSTANCEVARIABLE: usize = 9;
+pub const OP_GETCLASSVARIABLE: usize = 10;
+pub const OP_SETCLASSVARIABLE: usize = 11;
+pub const OP_GETCONSTANT: usize = 12;
+pub const OP_SETCONSTANT: usize = 13;
+pub const OP_GETGLOBAL: usize = 14;
+pub const OP_SETGLOBAL: usize = 15;
+pub const OP_PUTNIL: usize = 16;
+pub const OP_PUTSELF: usize = 17;
+pub const OP_PUTOBJECT: usize = 18;
+pub const OP_PUTSPECIALOBJECT: usize = 19;
+pub const OP_PUTSTRING: usize = 20;
+pub const OP_CONCATSTRINGS: usize = 21;
+pub const OP_ANYTOSTRING: usize = 22;
+pub const OP_TOREGEXP: usize = 23;
+pub const OP_INTERN: usize = 24;
+pub const OP_NEWARRAY: usize = 25;
+pub const OP_NEWARRAYKWSPLAT: usize = 26;
+pub const OP_DUPARRAY: usize = 27;
+pub const OP_DUPHASH: usize = 28;
+pub const OP_EXPANDARRAY: usize = 29;
+pub const OP_CONCATARRAY: usize = 30;
+pub const OP_SPLATARRAY: usize = 31;
+pub const OP_NEWHASH: usize = 32;
+pub const OP_NEWRANGE: usize = 33;
+pub const OP_POP: usize = 34;
+pub const OP_DUP: usize = 35;
+pub const OP_DUPN: usize = 36;
+pub const OP_SWAP: usize = 37;
+pub const OP_TOPN: usize = 38;
+pub const OP_SETN: usize = 39;
+pub const OP_ADJUSTSTACK: usize = 40;
+pub const OP_DEFINED: usize = 41;
+pub const OP_CHECKMATCH: usize = 42;
+pub const OP_CHECKKEYWORD: usize = 43;
+pub const OP_CHECKTYPE: usize = 44;
+pub const OP_DEFINECLASS: usize = 45;
+pub const OP_DEFINEMETHOD: usize = 46;
+pub const OP_DEFINESMETHOD: usize = 47;
+pub const OP_SEND: usize = 48;
+pub const OP_OPT_SEND_WITHOUT_BLOCK: usize = 49;
+pub const OP_OBJTOSTRING: usize = 50;
+pub const OP_OPT_STR_FREEZE: usize = 51;
+pub const OP_OPT_NIL_P: usize = 52;
+pub const OP_OPT_STR_UMINUS: usize = 53;
+pub const OP_OPT_NEWARRAY_MAX: usize = 54;
+pub const OP_OPT_NEWARRAY_MIN: usize = 55;
+pub const OP_INVOKESUPER: usize = 56;
+pub const OP_INVOKEBLOCK: usize = 57;
+pub const OP_LEAVE: usize = 58;
+pub const OP_THROW: usize = 59;
+pub const OP_JUMP: usize = 60;
+pub const OP_BRANCHIF: usize = 61;
+pub const OP_BRANCHUNLESS: usize = 62;
+pub const OP_BRANCHNIL: usize = 63;
+pub const OP_OPT_GETINLINECACHE: usize = 64;
+pub const OP_OPT_SETINLINECACHE: usize = 65;
+pub const OP_ONCE: usize = 66;
+pub const OP_OPT_CASE_DISPATCH: usize = 67;
+pub const OP_OPT_PLUS: usize = 68;
+pub const OP_OPT_MINUS: usize = 69;
+pub const OP_OPT_MULT: usize = 70;
+pub const OP_OPT_DIV: usize = 71;
+pub const OP_OPT_MOD: usize = 72;
+pub const OP_OPT_EQ: usize = 73;
+pub const OP_OPT_NEQ: usize = 74;
+pub const OP_OPT_LT: usize = 75;
+pub const OP_OPT_LE: usize = 76;
+pub const OP_OPT_GT: usize = 77;
+pub const OP_OPT_GE: usize = 78;
+pub const OP_OPT_LTLT: usize = 79;
+pub const OP_OPT_AND: usize = 80;
+pub const OP_OPT_OR: usize = 81;
+pub const OP_OPT_AREF: usize = 82;
+pub const OP_OPT_ASET: usize = 83;
+pub const OP_OPT_ASET_WITH: usize = 84;
+pub const OP_OPT_AREF_WITH: usize = 85;
+pub const OP_OPT_LENGTH: usize = 86;
+pub const OP_OPT_SIZE: usize = 87;
+pub const OP_OPT_EMPTY_P: usize = 88;
+pub const OP_OPT_SUCC: usize = 89;
+pub const OP_OPT_NOT: usize = 90;
+pub const OP_OPT_REGEXPMATCH2: usize = 91;
+pub const OP_INVOKEBUILTIN: usize = 92;
+pub const OP_OPT_INVOKEBUILTIN_DELEGATE: usize = 93;
+pub const OP_OPT_INVOKEBUILTIN_DELEGATE_LEAVE: usize = 94;
+pub const OP_GETLOCAL_WC_0: usize = 95;
+pub const OP_GETLOCAL_WC_1: usize = 96;
+pub const OP_SETLOCAL_WC_0: usize = 97;
+pub const OP_SETLOCAL_WC_1: usize = 98;
+pub const OP_PUTOBJECT_INT2FIX_0_: usize = 99;
+pub const OP_PUTOBJECT_INT2FIX_1_: usize = 100;
+
+pub const VM_INSTRUCTION_SIZE: usize = 202;
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
new file mode 100644
index 0000000000..2be42f5c63
--- /dev/null
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -0,0 +1,783 @@
+/* automatically generated by rust-bindgen 0.59.2 */
+
+pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
+pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
+pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
+pub const ARRAY_REDEFINED_OP_FLAG: u32 = 8;
+pub const HASH_REDEFINED_OP_FLAG: u32 = 16;
+pub const SYMBOL_REDEFINED_OP_FLAG: u32 = 64;
+pub const TIME_REDEFINED_OP_FLAG: u32 = 128;
+pub const REGEXP_REDEFINED_OP_FLAG: u32 = 256;
+pub const NIL_REDEFINED_OP_FLAG: u32 = 512;
+pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024;
+pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048;
+pub const PROC_REDEFINED_OP_FLAG: u32 = 4096;
+pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2;
+pub const VM_BLOCK_HANDLER_NONE: u32 = 0;
+pub type ID = ::std::os::raw::c_ulong;
+extern "C" {
+    pub fn rb_singleton_class(obj: VALUE) -> VALUE;
+}
+pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>;
+extern "C" {
+    pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
+}
+#[repr(C)]
+pub struct RBasic {
+    pub flags: VALUE,
+    pub klass: VALUE,
+}
+pub const RUBY_T_NONE: ruby_value_type = 0;
+pub const RUBY_T_OBJECT: ruby_value_type = 1;
+pub const RUBY_T_CLASS: ruby_value_type = 2;
+pub const RUBY_T_MODULE: ruby_value_type = 3;
+pub const RUBY_T_FLOAT: ruby_value_type = 4;
+pub const RUBY_T_STRING: ruby_value_type = 5;
+pub const RUBY_T_REGEXP: ruby_value_type = 6;
+pub const RUBY_T_ARRAY: ruby_value_type = 7;
+pub const RUBY_T_HASH: ruby_value_type = 8;
+pub const RUBY_T_STRUCT: ruby_value_type = 9;
+pub const RUBY_T_BIGNUM: ruby_value_type = 10;
+pub const RUBY_T_FILE: ruby_value_type = 11;
+pub const RUBY_T_DATA: ruby_value_type = 12;
+pub const RUBY_T_MATCH: ruby_value_type = 13;
+pub const RUBY_T_COMPLEX: ruby_value_type = 14;
+pub const RUBY_T_RATIONAL: ruby_value_type = 15;
+pub const RUBY_T_NIL: ruby_value_type = 17;
+pub const RUBY_T_TRUE: ruby_value_type = 18;
+pub const RUBY_T_FALSE: ruby_value_type = 19;
+pub const RUBY_T_SYMBOL: ruby_value_type = 20;
+pub const RUBY_T_FIXNUM: ruby_value_type = 21;
+pub const RUBY_T_UNDEF: ruby_value_type = 22;
+pub const RUBY_T_IMEMO: ruby_value_type = 26;
+pub const RUBY_T_NODE: ruby_value_type = 27;
+pub const RUBY_T_ICLASS: ruby_value_type = 28;
+pub const RUBY_T_ZOMBIE: ruby_value_type = 29;
+pub const RUBY_T_MOVED: ruby_value_type = 30;
+pub const RUBY_T_MASK: ruby_value_type = 31;
+pub type ruby_value_type = u32;
+pub type st_data_t = ::std::os::raw::c_ulong;
+pub type st_index_t = st_data_t;
+extern "C" {
+    pub fn rb_class_get_superclass(klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub static mut rb_mKernel: VALUE;
+}
+extern "C" {
+    pub static mut rb_cBasicObject: VALUE;
+}
+extern "C" {
+    pub static mut rb_cArray: VALUE;
+}
+extern "C" {
+    pub static mut rb_cFalseClass: VALUE;
+}
+extern "C" {
+    pub static mut rb_cFloat: VALUE;
+}
+extern "C" {
+    pub static mut rb_cHash: VALUE;
+}
+extern "C" {
+    pub static mut rb_cInteger: VALUE;
+}
+extern "C" {
+    pub static mut rb_cModule: VALUE;
+}
+extern "C" {
+    pub static mut rb_cNilClass: VALUE;
+}
+extern "C" {
+    pub static mut rb_cString: VALUE;
+}
+extern "C" {
+    pub static mut rb_cSymbol: VALUE;
+}
+extern "C" {
+    pub static mut rb_cThread: VALUE;
+}
+extern "C" {
+    pub static mut rb_cTrueClass: VALUE;
+}
+extern "C" {
+    pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE);
+}
+extern "C" {
+    pub fn rb_ary_resurrect(ary: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ary_clear(ary: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_new() -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE);
+}
+extern "C" {
+    pub fn rb_sym2id(obj: VALUE) -> ID;
+}
+extern "C" {
+    pub fn rb_id2sym(id: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID;
+}
+extern "C" {
+    pub fn rb_gc_mark(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_gc_mark_movable(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_gc_location(obj: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_backref_get() -> VALUE;
+}
+extern "C" {
+    pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_nth_match(n: ::std::os::raw::c_int, md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_last_match(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_pre(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_post(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_last(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_utf8_str_new(
+        ptr: *const ::std::os::raw::c_char,
+        len: ::std::os::raw::c_long,
+    ) -> VALUE;
+}
+extern "C" {
+    pub fn rb_str_intern(str_: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_info_dump(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
+}
+pub const idDot2: ruby_method_ids = 128;
+pub const idDot3: ruby_method_ids = 129;
+pub const idUPlus: ruby_method_ids = 132;
+pub const idUMinus: ruby_method_ids = 133;
+pub const idPow: ruby_method_ids = 134;
+pub const idCmp: ruby_method_ids = 135;
+pub const idPLUS: ruby_method_ids = 43;
+pub const idMINUS: ruby_method_ids = 45;
+pub const idMULT: ruby_method_ids = 42;
+pub const idDIV: ruby_method_ids = 47;
+pub const idMOD: ruby_method_ids = 37;
+pub const idLTLT: ruby_method_ids = 136;
+pub const idGTGT: ruby_method_ids = 137;
+pub const idLT: ruby_method_ids = 60;
+pub const idLE: ruby_method_ids = 138;
+pub const idGT: ruby_method_ids = 62;
+pub const idGE: ruby_method_ids = 139;
+pub const idEq: ruby_method_ids = 140;
+pub const idEqq: ruby_method_ids = 141;
+pub const idNeq: ruby_method_ids = 142;
+pub const idNot: ruby_method_ids = 33;
+pub const idAnd: ruby_method_ids = 38;
+pub const idOr: ruby_method_ids = 124;
+pub const idBackquote: ruby_method_ids = 96;
+pub const idEqTilde: ruby_method_ids = 143;
+pub const idNeqTilde: ruby_method_ids = 144;
+pub const idAREF: ruby_method_ids = 145;
+pub const idASET: ruby_method_ids = 146;
+pub const idCOLON2: ruby_method_ids = 147;
+pub const idANDOP: ruby_method_ids = 148;
+pub const idOROP: ruby_method_ids = 149;
+pub const idANDDOT: ruby_method_ids = 150;
+pub const tPRESERVED_ID_BEGIN: ruby_method_ids = 150;
+pub const idNilP: ruby_method_ids = 151;
+pub const idNULL: ruby_method_ids = 152;
+pub const idEmptyP: ruby_method_ids = 153;
+pub const idEqlP: ruby_method_ids = 154;
+pub const idRespond_to: ruby_method_ids = 155;
+pub const idRespond_to_missing: ruby_method_ids = 156;
+pub const idIFUNC: ruby_method_ids = 157;
+pub const idCFUNC: ruby_method_ids = 158;
+pub const id_core_set_method_alias: ruby_method_ids = 159;
+pub const id_core_set_variable_alias: ruby_method_ids = 160;
+pub const id_core_undef_method: ruby_method_ids = 161;
+pub const id_core_define_method: ruby_method_ids = 162;
+pub const id_core_define_singleton_method: ruby_method_ids = 163;
+pub const id_core_set_postexe: ruby_method_ids = 164;
+pub const id_core_hash_merge_ptr: ruby_method_ids = 165;
+pub const id_core_hash_merge_kwd: ruby_method_ids = 166;
+pub const id_core_raise: ruby_method_ids = 167;
+pub const id_core_sprintf: ruby_method_ids = 168;
+pub const id_debug_created_info: ruby_method_ids = 169;
+pub const tPRESERVED_ID_END: ruby_method_ids = 170;
+pub const tTOKEN_LOCAL_BEGIN: ruby_method_ids = 169;
+pub const tMax: ruby_method_ids = 170;
+pub const tMin: ruby_method_ids = 171;
+pub const tFreeze: ruby_method_ids = 172;
+pub const tInspect: ruby_method_ids = 173;
+pub const tIntern: ruby_method_ids = 174;
+pub const tObject_id: ruby_method_ids = 175;
+pub const tConst_added: ruby_method_ids = 176;
+pub const tConst_missing: ruby_method_ids = 177;
+pub const tMethodMissing: ruby_method_ids = 178;
+pub const tMethod_added: ruby_method_ids = 179;
+pub const tSingleton_method_added: ruby_method_ids = 180;
+pub const tMethod_removed: ruby_method_ids = 181;
+pub const tSingleton_method_removed: ruby_method_ids = 182;
+pub const tMethod_undefined: ruby_method_ids = 183;
+pub const tSingleton_method_undefined: ruby_method_ids = 184;
+pub const tLength: ruby_method_ids = 185;
+pub const tSize: ruby_method_ids = 186;
+pub const tGets: ruby_method_ids = 187;
+pub const tSucc: ruby_method_ids = 188;
+pub const tEach: ruby_method_ids = 189;
+pub const tProc: ruby_method_ids = 190;
+pub const tLambda: ruby_method_ids = 191;
+pub const tSend: ruby_method_ids = 192;
+pub const t__send__: ruby_method_ids = 193;
+pub const t__attached__: ruby_method_ids = 194;
+pub const t__recursive_key__: ruby_method_ids = 195;
+pub const tInitialize: ruby_method_ids = 196;
+pub const tInitialize_copy: ruby_method_ids = 197;
+pub const tInitialize_clone: ruby_method_ids = 198;
+pub const tInitialize_dup: ruby_method_ids = 199;
+pub const tTo_int: ruby_method_ids = 200;
+pub const tTo_ary: ruby_method_ids = 201;
+pub const tTo_str: ruby_method_ids = 202;
+pub const tTo_sym: ruby_method_ids = 203;
+pub const tTo_hash: ruby_method_ids = 204;
+pub const tTo_proc: ruby_method_ids = 205;
+pub const tTo_io: ruby_method_ids = 206;
+pub const tTo_a: ruby_method_ids = 207;
+pub const tTo_s: ruby_method_ids = 208;
+pub const tTo_i: ruby_method_ids = 209;
+pub const tTo_f: ruby_method_ids = 210;
+pub const tTo_r: ruby_method_ids = 211;
+pub const tBt: ruby_method_ids = 212;
+pub const tBt_locations: ruby_method_ids = 213;
+pub const tCall: ruby_method_ids = 214;
+pub const tMesg: ruby_method_ids = 215;
+pub const tException: ruby_method_ids = 216;
+pub const tLocals: ruby_method_ids = 217;
+pub const tNOT: ruby_method_ids = 218;
+pub const tAND: ruby_method_ids = 219;
+pub const tOR: ruby_method_ids = 220;
+pub const tDiv: ruby_method_ids = 221;
+pub const tDivmod: ruby_method_ids = 222;
+pub const tFdiv: ruby_method_ids = 223;
+pub const tQuo: ruby_method_ids = 224;
+pub const tName: ruby_method_ids = 225;
+pub const tNil: ruby_method_ids = 226;
+pub const tUScore: ruby_method_ids = 227;
+pub const tNUMPARAM_1: ruby_method_ids = 228;
+pub const tNUMPARAM_2: ruby_method_ids = 229;
+pub const tNUMPARAM_3: ruby_method_ids = 230;
+pub const tNUMPARAM_4: ruby_method_ids = 231;
+pub const tNUMPARAM_5: ruby_method_ids = 232;
+pub const tNUMPARAM_6: ruby_method_ids = 233;
+pub const tNUMPARAM_7: ruby_method_ids = 234;
+pub const tNUMPARAM_8: ruby_method_ids = 235;
+pub const tNUMPARAM_9: ruby_method_ids = 236;
+pub const tTOKEN_LOCAL_END: ruby_method_ids = 237;
+pub const tTOKEN_INSTANCE_BEGIN: ruby_method_ids = 236;
+pub const tTOKEN_INSTANCE_END: ruby_method_ids = 237;
+pub const tTOKEN_GLOBAL_BEGIN: ruby_method_ids = 236;
+pub const tLASTLINE: ruby_method_ids = 237;
+pub const tBACKREF: ruby_method_ids = 238;
+pub const tERROR_INFO: ruby_method_ids = 239;
+pub const tTOKEN_GLOBAL_END: ruby_method_ids = 240;
+pub const tTOKEN_CONST_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_CONST_END: ruby_method_ids = 240;
+pub const tTOKEN_CLASS_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_CLASS_END: ruby_method_ids = 240;
+pub const tTOKEN_ATTRSET_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_ATTRSET_END: ruby_method_ids = 240;
+pub const tNEXT_ID: ruby_method_ids = 240;
+pub const idMax: ruby_method_ids = 2721;
+pub const idMin: ruby_method_ids = 2737;
+pub const idFreeze: ruby_method_ids = 2753;
+pub const idInspect: ruby_method_ids = 2769;
+pub const idIntern: ruby_method_ids = 2785;
+pub const idObject_id: ruby_method_ids = 2801;
+pub const idConst_added: ruby_method_ids = 2817;
+pub const idConst_missing: ruby_method_ids = 2833;
+pub const idMethodMissing: ruby_method_ids = 2849;
+pub const idMethod_added: ruby_method_ids = 2865;
+pub const idSingleton_method_added: ruby_method_ids = 2881;
+pub const idMethod_removed: ruby_method_ids = 2897;
+pub const idSingleton_method_removed: ruby_method_ids = 2913;
+pub const idMethod_undefined: ruby_method_ids = 2929;
+pub const idSingleton_method_undefined: ruby_method_ids = 2945;
+pub const idLength: ruby_method_ids = 2961;
+pub const idSize: ruby_method_ids = 2977;
+pub const idGets: ruby_method_ids = 2993;
+pub const idSucc: ruby_method_ids = 3009;
+pub const idEach: ruby_method_ids = 3025;
+pub const idProc: ruby_method_ids = 3041;
+pub const idLambda: ruby_method_ids = 3057;
+pub const idSend: ruby_method_ids = 3073;
+pub const id__send__: ruby_method_ids = 3089;
+pub const id__attached__: ruby_method_ids = 3105;
+pub const id__recursive_key__: ruby_method_ids = 3121;
+pub const idInitialize: ruby_method_ids = 3137;
+pub const idInitialize_copy: ruby_method_ids = 3153;
+pub const idInitialize_clone: ruby_method_ids = 3169;
+pub const idInitialize_dup: ruby_method_ids = 3185;
+pub const idTo_int: ruby_method_ids = 3201;
+pub const idTo_ary: ruby_method_ids = 3217;
+pub const idTo_str: ruby_method_ids = 3233;
+pub const idTo_sym: ruby_method_ids = 3249;
+pub const idTo_hash: ruby_method_ids = 3265;
+pub const idTo_proc: ruby_method_ids = 3281;
+pub const idTo_io: ruby_method_ids = 3297;
+pub const idTo_a: ruby_method_ids = 3313;
+pub const idTo_s: ruby_method_ids = 3329;
+pub const idTo_i: ruby_method_ids = 3345;
+pub const idTo_f: ruby_method_ids = 3361;
+pub const idTo_r: ruby_method_ids = 3377;
+pub const idBt: ruby_method_ids = 3393;
+pub const idBt_locations: ruby_method_ids = 3409;
+pub const idCall: ruby_method_ids = 3425;
+pub const idMesg: ruby_method_ids = 3441;
+pub const idException: ruby_method_ids = 3457;
+pub const idLocals: ruby_method_ids = 3473;
+pub const idNOT: ruby_method_ids = 3489;
+pub const idAND: ruby_method_ids = 3505;
+pub const idOR: ruby_method_ids = 3521;
+pub const idDiv: ruby_method_ids = 3537;
+pub const idDivmod: ruby_method_ids = 3553;
+pub const idFdiv: ruby_method_ids = 3569;
+pub const idQuo: ruby_method_ids = 3585;
+pub const idName: ruby_method_ids = 3601;
+pub const idNil: ruby_method_ids = 3617;
+pub const idUScore: ruby_method_ids = 3633;
+pub const idNUMPARAM_1: ruby_method_ids = 3649;
+pub const idNUMPARAM_2: ruby_method_ids = 3665;
+pub const idNUMPARAM_3: ruby_method_ids = 3681;
+pub const idNUMPARAM_4: ruby_method_ids = 3697;
+pub const idNUMPARAM_5: ruby_method_ids = 3713;
+pub const idNUMPARAM_6: ruby_method_ids = 3729;
+pub const idNUMPARAM_7: ruby_method_ids = 3745;
+pub const idNUMPARAM_8: ruby_method_ids = 3761;
+pub const idNUMPARAM_9: ruby_method_ids = 3777;
+pub const idLASTLINE: ruby_method_ids = 3799;
+pub const idBACKREF: ruby_method_ids = 3815;
+pub const idERROR_INFO: ruby_method_ids = 3831;
+pub const tLAST_OP_ID: ruby_method_ids = 169;
+pub const idLAST_OP_ID: ruby_method_ids = 10;
+pub type ruby_method_ids = u32;
+extern "C" {
+    pub fn rb_ary_tmp_new_from_values(
+        arg1: VALUE,
+        arg2: ::std::os::raw::c_long,
+        arg3: *const VALUE,
+    ) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ec_ary_new_from_values(
+        ec: *mut rb_execution_context_struct,
+        n: ::std::os::raw::c_long,
+        elts: *const VALUE,
+    ) -> VALUE;
+}
+pub type rb_serial_t = ::std::os::raw::c_ulonglong;
+extern "C" {
+    pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char;
+}
+pub const imemo_env: imemo_type = 0;
+pub const imemo_cref: imemo_type = 1;
+pub const imemo_svar: imemo_type = 2;
+pub const imemo_throw_data: imemo_type = 3;
+pub const imemo_ifunc: imemo_type = 4;
+pub const imemo_memo: imemo_type = 5;
+pub const imemo_ment: imemo_type = 6;
+pub const imemo_iseq: imemo_type = 7;
+pub const imemo_tmpbuf: imemo_type = 8;
+pub const imemo_ast: imemo_type = 9;
+pub const imemo_parser_strterm: imemo_type = 10;
+pub const imemo_callinfo: imemo_type = 11;
+pub const imemo_callcache: imemo_type = 12;
+pub const imemo_constcache: imemo_type = 13;
+pub type imemo_type = u32;
+pub const METHOD_VISI_UNDEF: rb_method_visibility_t = 0;
+pub const METHOD_VISI_PUBLIC: rb_method_visibility_t = 1;
+pub const METHOD_VISI_PRIVATE: rb_method_visibility_t = 2;
+pub const METHOD_VISI_PROTECTED: rb_method_visibility_t = 3;
+pub const METHOD_VISI_MASK: rb_method_visibility_t = 3;
+pub type rb_method_visibility_t = u32;
+#[repr(C)]
+pub struct rb_method_entry_struct {
+    pub flags: VALUE,
+    pub defined_class: VALUE,
+    pub def: *mut rb_method_definition_struct,
+    pub called_id: ID,
+    pub owner: VALUE,
+}
+pub type rb_method_entry_t = rb_method_entry_struct;
+#[repr(C)]
+pub struct rb_callable_method_entry_struct {
+    pub flags: VALUE,
+    pub defined_class: VALUE,
+    pub def: *mut rb_method_definition_struct,
+    pub called_id: ID,
+    pub owner: VALUE,
+}
+pub type rb_callable_method_entry_t = rb_callable_method_entry_struct;
+pub const VM_METHOD_TYPE_ISEQ: rb_method_type_t = 0;
+pub const VM_METHOD_TYPE_CFUNC: rb_method_type_t = 1;
+pub const VM_METHOD_TYPE_ATTRSET: rb_method_type_t = 2;
+pub const VM_METHOD_TYPE_IVAR: rb_method_type_t = 3;
+pub const VM_METHOD_TYPE_BMETHOD: rb_method_type_t = 4;
+pub const VM_METHOD_TYPE_ZSUPER: rb_method_type_t = 5;
+pub const VM_METHOD_TYPE_ALIAS: rb_method_type_t = 6;
+pub const VM_METHOD_TYPE_UNDEF: rb_method_type_t = 7;
+pub const VM_METHOD_TYPE_NOTIMPLEMENTED: rb_method_type_t = 8;
+pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9;
+pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10;
+pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11;
+pub type rb_method_type_t = u32;
+pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0;
+pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1;
+pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2;
+pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3;
+pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4;
+pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5;
+pub type method_optimized_type = u32;
+extern "C" {
+    pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
+}
+extern "C" {
+    pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
+}
+pub type rb_num_t = ::std::os::raw::c_ulong;
+#[repr(C)]
+pub struct iseq_inline_constant_cache_entry {
+    pub flags: VALUE,
+    pub value: VALUE,
+    pub _unused1: VALUE,
+    pub _unused2: VALUE,
+    pub ic_cref: *const rb_cref_t,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_constant_cache {
+    pub entry: *mut iseq_inline_constant_cache_entry,
+    pub get_insn_idx: ::std::os::raw::c_uint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_iv_cache_entry {
+    pub entry: *mut rb_iv_index_tbl_entry,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_cvar_cache_entry {
+    pub entry: *mut rb_cvar_class_tbl_entry,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword {
+    pub num: ::std::os::raw::c_int,
+    pub required_num: ::std::os::raw::c_int,
+    pub bits_start: ::std::os::raw::c_int,
+    pub rest_start: ::std::os::raw::c_int,
+    pub table: *const ID,
+    pub default_values: *mut VALUE,
+}
+pub const BOP_PLUS: ruby_basic_operators = 0;
+pub const BOP_MINUS: ruby_basic_operators = 1;
+pub const BOP_MULT: ruby_basic_operators = 2;
+pub const BOP_DIV: ruby_basic_operators = 3;
+pub const BOP_MOD: ruby_basic_operators = 4;
+pub const BOP_EQ: ruby_basic_operators = 5;
+pub const BOP_EQQ: ruby_basic_operators = 6;
+pub const BOP_LT: ruby_basic_operators = 7;
+pub const BOP_LE: ruby_basic_operators = 8;
+pub const BOP_LTLT: ruby_basic_operators = 9;
+pub const BOP_AREF: ruby_basic_operators = 10;
+pub const BOP_ASET: ruby_basic_operators = 11;
+pub const BOP_LENGTH: ruby_basic_operators = 12;
+pub const BOP_SIZE: ruby_basic_operators = 13;
+pub const BOP_EMPTY_P: ruby_basic_operators = 14;
+pub const BOP_NIL_P: ruby_basic_operators = 15;
+pub const BOP_SUCC: ruby_basic_operators = 16;
+pub const BOP_GT: ruby_basic_operators = 17;
+pub const BOP_GE: ruby_basic_operators = 18;
+pub const BOP_NOT: ruby_basic_operators = 19;
+pub const BOP_NEQ: ruby_basic_operators = 20;
+pub const BOP_MATCH: ruby_basic_operators = 21;
+pub const BOP_FREEZE: ruby_basic_operators = 22;
+pub const BOP_UMINUS: ruby_basic_operators = 23;
+pub const BOP_MAX: ruby_basic_operators = 24;
+pub const BOP_MIN: ruby_basic_operators = 25;
+pub const BOP_CALL: ruby_basic_operators = 26;
+pub const BOP_AND: ruby_basic_operators = 27;
+pub const BOP_OR: ruby_basic_operators = 28;
+pub const BOP_LAST_: ruby_basic_operators = 29;
+pub type ruby_basic_operators = u32;
+pub type rb_control_frame_t = rb_control_frame_struct;
+extern "C" {
+    pub static mut rb_mRubyVMFrozenCore: VALUE;
+}
+extern "C" {
+    pub static mut rb_block_param_proxy: VALUE;
+}
+pub type IC = *mut iseq_inline_constant_cache;
+pub type IVC = *mut iseq_inline_iv_cache_entry;
+pub type ICVARC = *mut iseq_inline_cvar_cache_entry;
+pub const VM_FRAME_MAGIC_METHOD: vm_frame_env_flags = 286326785;
+pub const VM_FRAME_MAGIC_BLOCK: vm_frame_env_flags = 572653569;
+pub const VM_FRAME_MAGIC_CLASS: vm_frame_env_flags = 858980353;
+pub const VM_FRAME_MAGIC_TOP: vm_frame_env_flags = 1145307137;
+pub const VM_FRAME_MAGIC_CFUNC: vm_frame_env_flags = 1431633921;
+pub const VM_FRAME_MAGIC_IFUNC: vm_frame_env_flags = 1717960705;
+pub const VM_FRAME_MAGIC_EVAL: vm_frame_env_flags = 2004287489;
+pub const VM_FRAME_MAGIC_RESCUE: vm_frame_env_flags = 2022178817;
+pub const VM_FRAME_MAGIC_DUMMY: vm_frame_env_flags = 2040070145;
+pub const VM_FRAME_MAGIC_MASK: vm_frame_env_flags = 2147418113;
+pub const VM_FRAME_FLAG_FINISH: vm_frame_env_flags = 32;
+pub const VM_FRAME_FLAG_BMETHOD: vm_frame_env_flags = 64;
+pub const VM_FRAME_FLAG_CFRAME: vm_frame_env_flags = 128;
+pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256;
+pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512;
+pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024;
+pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048;
+pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2;
+pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
+pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
+pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
+pub type vm_frame_env_flags = u32;
+extern "C" {
+    pub fn rb_vm_frame_method_entry(
+        cfp: *const rb_control_frame_t,
+    ) -> *const rb_callable_method_entry_t;
+}
+pub const VM_CALL_ARGS_SPLAT_bit: vm_call_flag_bits = 0;
+pub const VM_CALL_ARGS_BLOCKARG_bit: vm_call_flag_bits = 1;
+pub const VM_CALL_FCALL_bit: vm_call_flag_bits = 2;
+pub const VM_CALL_VCALL_bit: vm_call_flag_bits = 3;
+pub const VM_CALL_ARGS_SIMPLE_bit: vm_call_flag_bits = 4;
+pub const VM_CALL_BLOCKISEQ_bit: vm_call_flag_bits = 5;
+pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 6;
+pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 7;
+pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 8;
+pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 9;
+pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 10;
+pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 11;
+pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 12;
+pub const VM_CALL__END: vm_call_flag_bits = 13;
+pub type vm_call_flag_bits = u32;
+#[repr(C)]
+pub struct rb_callinfo {
+    pub flags: VALUE,
+    pub kwarg: *const rb_callinfo_kwarg,
+    pub mid: VALUE,
+    pub flag: VALUE,
+    pub argc: VALUE,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_call_data {
+    pub ci: *const rb_callinfo,
+    pub cc: *const rb_callcache,
+}
+extern "C" {
+    pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_str_concat_literals(num: size_t, strary: *const VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_ensure_iv_index_mapping(obj: VALUE, id: ID) -> u32;
+}
+extern "C" {
+    pub fn rb_gvar_get(arg1: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
+}
+#[repr(C)]
+pub struct rb_iv_index_tbl_entry {
+    pub index: u32,
+    pub class_serial: rb_serial_t,
+    pub class_value: VALUE,
+}
+#[repr(C)]
+pub struct rb_cvar_class_tbl_entry {
+    pub index: u32,
+    pub global_cvar_state: rb_serial_t,
+    pub class_value: VALUE,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_builtin_function {
+    pub func_ptr: *const ::std::os::raw::c_void,
+    pub argc: ::std::os::raw::c_int,
+    pub index: ::std::os::raw::c_int,
+    pub name: *const ::std::os::raw::c_char,
+    pub compiler: ::std::option::Option<
+        unsafe extern "C" fn(
+            arg1: *mut FILE,
+            arg2: ::std::os::raw::c_long,
+            arg3: ::std::os::raw::c_uint,
+            arg4: bool,
+        ),
+    >,
+}
+extern "C" {
+    pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
+}
+pub type rb_iseq_each_i = ::std::option::Option<
+    unsafe extern "C" fn(
+        code: *mut VALUE,
+        insn: VALUE,
+        index: size_t,
+        data: *mut ::std::os::raw::c_void,
+    ) -> bool,
+>;
+extern "C" {
+    pub fn rb_iseq_each(
+        iseq: *const rb_iseq_t,
+        start_index: size_t,
+        iterator: rb_iseq_each_i,
+        data: *mut ::std::os::raw::c_void,
+    );
+}
+extern "C" {
+    pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
+}
+extern "C" {
+    pub fn rb_vm_barrier();
+}
+extern "C" {
+    pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+}
+extern "C" {
+    pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+}
+extern "C" {
+    pub fn rb_yjit_get_page_size() -> u32;
+}
+extern "C" {
+    pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool;
+}
+extern "C" {
+    pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE);
+}
+extern "C" {
+    pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
+}
+extern "C" {
+    pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
+}
+extern "C" {
+    pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t);
+}
+extern "C" {
+    pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
+}
+extern "C" {
+    pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
+}
+pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
+extern "C" {
+    pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
+}
+extern "C" {
+    pub fn rb_leaf_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
+}
+extern "C" {
+    pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
+}
+extern "C" {
+    pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
+}
+extern "C" {
+    pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t;
+}
+extern "C" {
+    pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
+}
+extern "C" {
+    pub fn rb_yjit_multi_ractor_p() -> bool;
+}
+extern "C" {
+    pub fn rb_assert_iseq_handle(handle: VALUE);
+}
+extern "C" {
+    pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int;
+}
+extern "C" {
+    pub fn rb_assert_cme_handle(handle: VALUE);
+}
+pub type iseq_callback = ::std::option::Option<unsafe extern "C" fn(arg1: *const rb_iseq_t)>;
+extern "C" {
+    pub fn rb_yjit_for_each_iseq(callback: iseq_callback);
+}
+extern "C" {
+    pub fn rb_yjit_obj_written(
+        old: VALUE,
+        young: VALUE,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
+extern "C" {
+    pub fn rb_yjit_vm_lock_then_barrier(
+        recursive_lock_level: *mut ::std::os::raw::c_uint,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
+extern "C" {
+    pub fn rb_yjit_vm_unlock(
+        recursive_lock_level: *mut ::std::os::raw::c_uint,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
new file mode 100644
index 0000000000..97edc786bc
--- /dev/null
+++ b/yjit/src/disasm.rs
@@ -0,0 +1,218 @@
+use crate::asm::*;
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::yjit::yjit_enabled_p;
+use std::fmt::Write;
+
+/// Primitive called in yjit.rb
+/// Produce a string representing the disassembly for an ISEQ
+#[no_mangle]
+pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE {
+    #[cfg(not(feature = "disasm"))]
+    {
+        let _ = iseqw;
+        return Qnil;
+    }
+
+    #[cfg(feature = "disasm")]
+    {
+        // TODO:
+        //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
+        //    return Qnil;
+        //}
+
+        if !yjit_enabled_p() {
+            return Qnil;
+        }
+
+        // Get the iseq pointer from the wrapper
+        let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
+
+        let out_string = disasm_iseq(iseq);
+
+        return rust_str_to_ruby(&out_string);
+    }
+}
+
+#[cfg(feature = "disasm")]
+fn disasm_iseq(iseq: IseqPtr) -> String {
+    let mut out = String::from("");
+
+    // Get a list of block versions generated for this iseq
+    let mut block_list = get_iseq_block_list(iseq);
+
+    // Get a list of codeblocks relevant to this iseq
+    let global_cb = CodegenGlobals::get_inline_cb();
+
+    // Sort the blocks by increasing start addresses
+    block_list.sort_by(|a, b| {
+        use std::cmp::Ordering;
+
+        // Get the start addresses for each block
+        let addr_a = a.borrow().get_start_addr().unwrap().raw_ptr();
+        let addr_b = b.borrow().get_start_addr().unwrap().raw_ptr();
+
+        if addr_a < addr_b {
+            Ordering::Less
+        } else if addr_a == addr_b {
+            Ordering::Equal
+        } else {
+            Ordering::Greater
+        }
+    });
+
+    // Compute total code size in bytes for all blocks in the function
+    let mut total_code_size = 0;
+    for blockref in &block_list {
+        total_code_size += blockref.borrow().code_size();
+    }
+
+    // Initialize capstone
+    extern crate capstone;
+    use capstone::prelude::*;
+    let cs = Capstone::new()
+        .x86()
+        .mode(arch::x86::ArchMode::Mode64)
+        .syntax(arch::x86::ArchSyntax::Intel)
+        .build()
+        .unwrap();
+
+    out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len()));
+    out.push_str(&format!(
+        "TOTAL INLINE CODE SIZE: {} bytes\n",
+        total_code_size
+    ));
+
+    // For each block, sorted by increasing start address
+    for block_idx in 0..block_list.len() {
+        let block = block_list[block_idx].borrow();
+        let blockid = block.get_blockid();
+        let end_idx = block.get_end_idx();
+        let start_addr = block.get_start_addr().unwrap().raw_ptr();
+        let end_addr = block.get_end_addr().unwrap().raw_ptr();
+        let code_size = block.code_size();
+
+        // Write some info about the current block
+        let block_ident = format!(
+            "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
+            block_idx + 1,
+            block_list.len(),
+            blockid.idx,
+            end_idx,
+            code_size
+        );
+        out.push_str(&format!("== {:=<60}\n", block_ident));
+
+        // Disassemble the instructions
+        let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
+        let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
+
+        // For each instruction in this block
+        for insn in insns.as_ref() {
+            // Comments for this block
+            if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
+                for comment in comment_list {
+                    out.push_str(&format!("  \x1b[1m# {}\x1b[0m\n", comment));
+                }
+            }
+            out.push_str(&format!("  {}\n", insn));
+        }
+
+        // If this is not the last block
+        if block_idx < block_list.len() - 1 {
+            // Compute the size of the gap between this block and the next
+            let next_block = block_list[block_idx + 1].borrow();
+            let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
+            let gap_size = (next_start_addr as usize) - (end_addr as usize);
+
+            // Log the size of the gap between the blocks if nonzero
+            if gap_size > 0 {
+                out.push_str(&format!("... {} byte gap ...\n", gap_size));
+            }
+        }
+    }
+
+    return out;
+}
+
+/// Primitive called in yjit.rb
+/// Produce a list of instructions compiled for an isew
+#[no_mangle]
+pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE {
+    {
+        // TODO:
+        //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
+        //    return Qnil;
+        //}
+
+        if !yjit_enabled_p() {
+            return Qnil;
+        }
+
+        // Get the iseq pointer from the wrapper
+        let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
+
+        // Get the list of instructions compiled
+        let insn_vec = insns_compiled(iseq);
+
+        unsafe {
+            let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64);
+
+            // For each instruction compiled
+            for idx in 0..insn_vec.len() {
+                let op_name = &insn_vec[idx].0;
+                let insn_idx = insn_vec[idx].1;
+
+                let op_sym = rust_str_to_sym(&op_name);
+
+                // Store the instruction index and opcode symbol
+                rb_ary_store(
+                    insn_ary,
+                    (2 * idx + 0) as i64,
+                    VALUE::fixnum_from_usize(insn_idx as usize),
+                );
+                rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym);
+            }
+
+            insn_ary
+        }
+    }
+}
+
+fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> {
+    let mut insn_vec = Vec::new();
+
+    // Get a list of block versions generated for this iseq
+    let block_list = get_iseq_block_list(iseq);
+
+    // For each block associated with this iseq
+    for blockref in &block_list {
+        let block = blockref.borrow();
+        let start_idx = block.get_blockid().idx;
+        let end_idx = block.get_end_idx();
+        assert!(end_idx <= unsafe { get_iseq_encoded_size(iseq) });
+
+        // For each YARV instruction in the block
+        let mut insn_idx = start_idx;
+        while insn_idx < end_idx {
+            // Get the current pc and opcode
+            let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+            // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
+            let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
+                .try_into()
+                .unwrap();
+
+            // Get the mnemonic for this opcode
+            let op_name = insn_name(opcode);
+
+            // Add the instruction to the list
+            insn_vec.push((op_name, insn_idx));
+
+            // Move to the next instruction
+            insn_idx += insn_len(opcode);
+        }
+    }
+
+    return insn_vec;
+}
diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs
new file mode 100644
index 0000000000..262121a488
--- /dev/null
+++ b/yjit/src/invariants.rs
@@ -0,0 +1,585 @@
+//! Code to track assumptions made during code generation and invalidate
+//! generated code if and when these assumptions are invalidated.
+
+use crate::asm::OutlinedCb;
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::IntoUsize;
+use crate::yjit::yjit_enabled_p;
+
+use std::collections::{HashMap, HashSet};
+use std::mem;
+use std::os::raw::c_void;
+
+// Invariants to track:
+// assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)
+// assume_method_lookup_stable(comptime_recv_klass, cme, jit);
+// assume_single_ractor_mode(jit)
+// assume_stable_global_constant_state(jit);
+
+/// Used to track all of the various block references that contain assumptions
+/// about the state of the virtual machine.
+pub struct Invariants {
+    /// Tracks block assumptions about callable method entry validity.
+    cme_validity: HashMap<*const rb_callable_method_entry_t, HashSet<BlockRef>>,
+
+    /// Tracks block assumptions about method lookup. Maps a class to a table of
+    /// method ID points to a set of blocks. While a block `b` is in the table,
+    /// b->callee_cme == rb_callable_method_entry(klass, mid).
+    method_lookup: HashMap<VALUE, HashMap<ID, HashSet<BlockRef>>>,
+
+    /// A map from a class and its associated basic operator to a set of blocks
+    /// that are assuming that that operator is not redefined. This is used for
+    /// quick access to all of the blocks that are making this assumption when
+    /// the operator is redefined.
+    basic_operator_blocks: HashMap<(RedefinitionFlag, ruby_basic_operators), HashSet<BlockRef>>,
+
+    /// A map from a block to a set of classes and their associated basic
+    /// operators that the block is assuming are not redefined. This is used for
+    /// quick access to all of the assumptions that a block is making when it
+    /// needs to be invalidated.
+    block_basic_operators: HashMap<BlockRef, HashSet<(RedefinitionFlag, ruby_basic_operators)>>,
+
+    /// Tracks the set of blocks that are assuming the interpreter is running
+    /// with only one ractor. This is important for things like accessing
+    /// constants which can have different semantics when multiple ractors are
+    /// running.
+    single_ractor: HashSet<BlockRef>,
+
+    /// A map from an ID to the set of blocks that are assuming a constant with
+    /// that ID as part of its name has not been redefined. For example, if
+    /// a constant `A::B` is redefined, then all blocks that are assuming that
+    /// `A` and `B` have not be redefined must be invalidated.
+    constant_state_blocks: HashMap<ID, HashSet<BlockRef>>,
+
+    /// A map from a block to a set of IDs that it is assuming have not been
+    /// redefined.
+    block_constant_states: HashMap<BlockRef, HashSet<ID>>,
+}
+
+/// Private singleton instance of the invariants global struct.
+static mut INVARIANTS: Option<Invariants> = None;
+
+impl Invariants {
+    pub fn init() {
+        // Wrapping this in unsafe to assign directly to a global.
+        unsafe {
+            INVARIANTS = Some(Invariants {
+                cme_validity: HashMap::new(),
+                method_lookup: HashMap::new(),
+                basic_operator_blocks: HashMap::new(),
+                block_basic_operators: HashMap::new(),
+                single_ractor: HashSet::new(),
+                constant_state_blocks: HashMap::new(),
+                block_constant_states: HashMap::new(),
+            });
+        }
+    }
+
+    /// Get a mutable reference to the codegen globals instance
+    pub fn get_instance() -> &'static mut Invariants {
+        unsafe { INVARIANTS.as_mut().unwrap() }
+    }
+}
+
+/// A public function that can be called from within the code generation
+/// functions to ensure that the block being generated is invalidated when the
+/// basic operator is redefined.
+pub fn assume_bop_not_redefined(
+    jit: &mut JITState,
+    ocb: &mut OutlinedCb,
+    klass: RedefinitionFlag,
+    bop: ruby_basic_operators,
+) -> bool {
+    if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } {
+        jit_ensure_block_entry_exit(jit, ocb);
+
+        let invariants = Invariants::get_instance();
+        invariants
+            .basic_operator_blocks
+            .entry((klass, bop))
+            .or_insert(HashSet::new())
+            .insert(jit.get_block());
+        invariants
+            .block_basic_operators
+            .entry(jit.get_block())
+            .or_insert(HashSet::new())
+            .insert((klass, bop));
+
+        return true;
+    } else {
+        return false;
+    }
+}
+
+// Remember that a block assumes that
+// `rb_callable_method_entry(receiver_klass, cme->called_id) == cme` and that
+// `cme` is valid.
+// When either of these assumptions becomes invalid, rb_yjit_method_lookup_change() or
+// rb_yjit_cme_invalidate() invalidates the block.
+//
+// @raise NoMemoryError
+pub fn assume_method_lookup_stable(
+    jit: &mut JITState,
+    ocb: &mut OutlinedCb,
+    receiver_klass: VALUE,
+    callee_cme: *const rb_callable_method_entry_t,
+) {
+    // RUBY_ASSERT(rb_callable_method_entry(receiver_klass, cme->called_id) == cme);
+    // RUBY_ASSERT_ALWAYS(RB_TYPE_P(receiver_klass, T_CLASS) || RB_TYPE_P(receiver_klass, T_ICLASS));
+    // RUBY_ASSERT_ALWAYS(!rb_objspace_garbage_object_p(receiver_klass));
+
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    let block = jit.get_block();
+    block
+        .borrow_mut()
+        .add_cme_dependency(receiver_klass, callee_cme);
+
+    Invariants::get_instance()
+        .cme_validity
+        .entry(callee_cme)
+        .or_insert(HashSet::new())
+        .insert(block.clone());
+
+    let mid = unsafe { (*callee_cme).called_id };
+    Invariants::get_instance()
+        .method_lookup
+        .entry(receiver_klass)
+        .or_insert(HashMap::new())
+        .entry(mid)
+        .or_insert(HashSet::new())
+        .insert(block.clone());
+}
+
+/// Tracks that a block is assuming it is operating in single-ractor mode.
+#[must_use]
+pub fn assume_single_ractor_mode(jit: &mut JITState, ocb: &mut OutlinedCb) -> bool {
+    if unsafe { rb_yjit_multi_ractor_p() } {
+        false
+    } else {
+        jit_ensure_block_entry_exit(jit, ocb);
+        Invariants::get_instance()
+            .single_ractor
+            .insert(jit.get_block());
+        true
+    }
+}
+
+/// Walk through the ISEQ to go from the current opt_getinlinecache to the
+/// subsequent opt_setinlinecache and find all of the name components that are
+/// associated with this constant (which correspond to the getconstant
+/// arguments).
+pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb) {
+    /// Tracks that a block is assuming that the name component of a constant
+    /// has not changed since the last call to this function.
+    unsafe extern "C" fn assume_stable_constant_name(
+        code: *mut VALUE,
+        insn: VALUE,
+        index: u64,
+        data: *mut c_void,
+    ) -> bool {
+        if insn.as_usize() == OP_OPT_SETINLINECACHE {
+            return false;
+        }
+
+        if insn.as_usize() == OP_GETCONSTANT {
+            let jit = &mut *(data as *mut JITState);
+
+            // The first operand to GETCONSTANT is always the ID associated with
+            // the constant lookup. We are grabbing this out in order to
+            // associate this block with the stability of this constant name.
+            let id = code.add(index.as_usize() + 1).read().as_u64() as ID;
+
+            let invariants = Invariants::get_instance();
+            invariants
+                .constant_state_blocks
+                .entry(id)
+                .or_insert(HashSet::new())
+                .insert(jit.get_block());
+            invariants
+                .block_constant_states
+                .entry(jit.get_block())
+                .or_insert(HashSet::new())
+                .insert(id);
+        }
+
+        true
+    }
+
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    unsafe {
+        let iseq = jit.get_iseq();
+        let encoded = get_iseq_body_iseq_encoded(iseq);
+        let start_index = jit.get_pc().offset_from(encoded);
+
+        rb_iseq_each(
+            iseq,
+            start_index.try_into().unwrap(),
+            Some(assume_stable_constant_name),
+            jit as *mut _ as *mut c_void,
+        );
+    };
+}
+
+/// Called when a basic operator is redefined. Note that all the blocks assuming
+/// the stability of different operators are invalidated together and we don't
+/// do fine-grained tracking.
+#[no_mangle]
+pub extern "C" fn rb_yjit_bop_redefined(klass: RedefinitionFlag, bop: ruby_basic_operators) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        // Loop through the blocks that are associated with this class and basic
+        // operator and invalidate them.
+        Invariants::get_instance()
+            .basic_operator_blocks
+            .remove(&(klass, bop))
+            .map(|blocks| {
+                for block in blocks.iter() {
+                    invalidate_block_version(block);
+                    incr_counter!(invalidate_bop_redefined);
+                }
+            });
+    });
+}
+
+/// Callback for when a cme becomes invalid. Invalidate all blocks that depend
+/// on the given cme being valid.
+#[no_mangle]
+pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_entry_t) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        if let Some(blocks) = Invariants::get_instance().cme_validity.remove(&callee_cme) {
+            for block in blocks.iter() {
+                invalidate_block_version(block);
+                incr_counter!(invalidate_method_lookup);
+            }
+        }
+    });
+}
+
+/// Callback for when rb_callable_method_entry(klass, mid) is going to change.
+/// Invalidate blocks that assume stable method lookup of `mid` in `klass` when this happens.
+/// This needs to be wrapped on the C side with RB_VM_LOCK_ENTER().
+#[no_mangle]
+pub extern "C" fn rb_yjit_method_lookup_change(klass: VALUE, mid: ID) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        Invariants::get_instance()
+            .method_lookup
+            .entry(klass)
+            .and_modify(|deps| {
+                if let Some(deps) = deps.remove(&mid) {
+                    for block in &deps {
+                        invalidate_block_version(block);
+                        incr_counter!(invalidate_method_lookup);
+                    }
+                }
+            });
+    });
+}
+
+/// Callback for then Ruby is about to spawn a ractor. In that case we need to
+/// invalidate every block that is assuming single ractor mode.
+#[no_mangle]
+pub extern "C" fn rb_yjit_before_ractor_spawn() {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        // Clear the set of blocks inside Invariants
+        let blocks = mem::take(&mut Invariants::get_instance().single_ractor);
+
+        // Invalidate the blocks
+        for block in &blocks {
+            invalidate_block_version(block);
+            incr_counter!(invalidate_ractor_spawn);
+        }
+    });
+}
+
+/// Callback for when the global constant state changes.
+#[no_mangle]
+pub extern "C" fn rb_yjit_constant_state_changed(id: ID) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        if get_option!(global_constant_state) {
+            // If the global-constant-state option is set, then we're going to
+            // invalidate every block that depends on any constant.
+
+            Invariants::get_instance()
+                .constant_state_blocks
+                .keys()
+                .for_each(|id| {
+                    if let Some(blocks) =
+                        Invariants::get_instance().constant_state_blocks.remove(&id)
+                    {
+                        for block in &blocks {
+                            invalidate_block_version(block);
+                            incr_counter!(invalidate_constant_state_bump);
+                        }
+                    }
+                });
+        } else {
+            // If the global-constant-state option is not set, then we're only going
+            // to invalidate the blocks that are associated with the given ID.
+
+            if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) {
+                for block in &blocks {
+                    invalidate_block_version(block);
+                    incr_counter!(invalidate_constant_state_bump);
+                }
+            }
+        }
+    });
+}
+
+/// Callback for marking GC objects inside [Invariants].
+/// See `struct yjijt_root_struct` in C.
+#[no_mangle]
+pub extern "C" fn rb_yjit_root_mark() {
+    // Comment from C YJIT:
+    //
+    // Why not let the GC move the cme keys in this table?
+    // Because this is basically a compare_by_identity Hash.
+    // If a key moves, we would need to reinsert it into the table so it is rehashed.
+    // That is tricky to do, espcially as it could trigger allocation which could
+    // trigger GC. Not sure if it is okay to trigger GC while the GC is updating
+    // references.
+    //
+    // NOTE(alan): since we are using Rust data structures that don't interact
+    // with the Ruby GC now, it might be feasible to allow movement.
+
+    let invariants = Invariants::get_instance();
+
+    // Mark CME imemos
+    for cme in invariants.cme_validity.keys() {
+        let cme: VALUE = (*cme).into();
+
+        unsafe { rb_gc_mark(cme) };
+    }
+
+    // Mark class and iclass objects
+    for klass in invariants.method_lookup.keys() {
+        // TODO: This is a leak. Unused blocks linger in the table forever, preventing the
+        // callee class they speculate on from being collected.
+        // We could do a bespoke weak reference scheme on classes similar to
+        // the interpreter's call cache. See finalizer for T_CLASS and cc_table_free().
+
+        unsafe { rb_gc_mark(*klass) };
+    }
+}
+
+/// Remove all invariant assumptions made by the block by removing the block as
+/// as a key in all of the relevant tables.
+pub fn block_assumptions_free(blockref: &BlockRef) {
+    let invariants = Invariants::get_instance();
+
+    {
+        let block = blockref.borrow();
+
+        // For each method lookup dependency
+        for dep in block.iter_cme_deps() {
+            // Remove tracking for cme validity
+            if let Some(blockset) = invariants.cme_validity.get_mut(&dep.callee_cme) {
+                blockset.remove(blockref);
+            }
+
+            // Remove tracking for lookup stability
+            if let Some(id_to_block_set) = invariants.method_lookup.get_mut(&dep.receiver_klass) {
+                let mid = unsafe { (*dep.callee_cme).called_id };
+                if let Some(block_set) = id_to_block_set.get_mut(&mid) {
+                    block_set.remove(&blockref);
+                }
+            }
+        }
+    }
+
+    // Remove tracking for basic operators that the given block assumes have
+    // not been redefined.
+    if let Some(bops) = invariants.block_basic_operators.remove(&blockref) {
+        // Remove tracking for the given block from the list of blocks associated
+        // with the given basic operator.
+        for key in &bops {
+            if let Some(blocks) = invariants.basic_operator_blocks.get_mut(key) {
+                blocks.remove(&blockref);
+            }
+        }
+    }
+
+    invariants.single_ractor.remove(&blockref);
+
+    // Remove tracking for constant state for a given ID.
+    if let Some(ids) = invariants.block_constant_states.remove(&blockref) {
+        for id in ids {
+            if let Some(blocks) = invariants.constant_state_blocks.get_mut(&id) {
+                blocks.remove(&blockref);
+            }
+        }
+    }
+}
+
+/// Callback from the opt_setinlinecache instruction in the interpreter.
+/// Invalidate the block for the matching opt_getinlinecache so it could regenerate code
+/// using the new value in the constant cache.
+#[no_mangle]
+pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } {
+        // We can't generate code in these situations, so no need to invalidate.
+        // See gen_opt_getinlinecache.
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        let code = unsafe { get_iseq_body_iseq_encoded(iseq) };
+        let get_insn_idx = unsafe { (*ic).get_insn_idx };
+
+        // This should come from a running iseq, so direct threading translation
+        // should have been done
+        assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED)) } != VALUE(0));
+        assert!(get_insn_idx < unsafe { get_iseq_encoded_size(iseq) });
+
+        // Ensure that the instruction the get_insn_idx is pointing to is in
+        // fact a opt_getinlinecache instruction.
+        assert_eq!(
+            unsafe {
+                let opcode_pc = code.add(get_insn_idx.as_usize());
+                let translated_opcode: VALUE = opcode_pc.read();
+                rb_vm_insn_decode(translated_opcode)
+            },
+            OP_OPT_GETINLINECACHE.try_into().unwrap()
+        );
+
+        // Find the matching opt_getinlinecache and invalidate all the blocks there
+        // RUBY_ASSERT(insn_op_type(BIN(opt_getinlinecache), 1) == TS_IC);
+
+        let ic_pc = unsafe { code.add(get_insn_idx.as_usize() + 2) };
+        let ic_operand: IC = unsafe { ic_pc.read() }.as_mut_ptr();
+
+        if ic == ic_operand {
+            for block in take_version_list(BlockId {
+                iseq,
+                idx: get_insn_idx,
+            }) {
+                invalidate_block_version(&block);
+                incr_counter!(invalidate_constant_ic_fill);
+            }
+        } else {
+            panic!("ic->get_insn_index not set properly");
+        }
+    });
+}
+
+// Invalidate all generated code and patch C method return code to contain
+// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
+// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
+// means they are inside a C routine. If there are any generated code on-stack,
+// they are waiting for a return from a C routine. For every routine call, we
+// patch in an exit after the body of the containing VM instruction. This makes
+// it so all the invalidated code exit as soon as execution logically reaches
+// the next VM instruction. The interpreter takes care of firing the tracing
+// event if it so happens that the next VM instruction has one attached.
+//
+// The c_return event needs special handling as our codegen never outputs code
+// that contains tracing logic. If we let the normal output code run until the
+// start of the next VM instruction by relying on the patching scheme above, we
+// would fail to fire the c_return event. The interpreter doesn't fire the
+// event at an instruction boundary, so simply exiting to the interpreter isn't
+// enough. To handle it, we patch in the full logic at the return address. See
+// full_cfunc_return().
+//
+// In addition to patching, we prevent future entries into invalidated code by
+// removing all live blocks from their iseq.
+#[no_mangle]
+pub extern "C" fn rb_yjit_tracing_invalidate_all() {
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    use crate::asm::x86_64::jmp_ptr;
+
+    // Stop other ractors since we are going to patch machine code.
+    with_vm_lock(src_loc!(), || {
+        // Make it so all live block versions are no longer valid branch targets
+        unsafe { rb_yjit_for_each_iseq(Some(invalidate_all_blocks_for_tracing)) };
+
+        extern "C" fn invalidate_all_blocks_for_tracing(iseq: IseqPtr) {
+            if let Some(payload) = unsafe { load_iseq_payload(iseq) } {
+                // C comment:
+                //   Leaking the blocks for now since we might have situations where
+                //   a different ractor is waiting for the VM lock in branch_stub_hit().
+                //   If we free the block that ractor can wake up with a dangling block.
+                //
+                // Deviation: since we ref count the the blocks now, we might be deallocating and
+                //   not leak the block.
+                //
+                // Empty all blocks on the iseq so we don't compile new blocks that jump to the
+                // invalidated region.
+                let blocks = payload.take_all_blocks();
+                for blockref in blocks {
+                    block_assumptions_free(&blockref);
+                }
+            }
+
+            // Reset output code entry point
+            unsafe { rb_iseq_reset_jit_func(iseq) };
+        }
+
+        let cb = CodegenGlobals::get_inline_cb();
+
+        // Apply patches
+        let old_pos = cb.get_write_pos();
+        let patches = CodegenGlobals::take_global_inval_patches();
+        for patch in &patches {
+            cb.set_write_ptr(patch.inline_patch_pos);
+            jmp_ptr(cb, patch.outlined_target_pos);
+
+            // FIXME: Can't easily check we actually wrote out the JMP at the moment.
+            // assert!(!cb.has_dropped_bytes(), "patches should have space and jump offsets should fit in JMP rel32");
+        }
+        cb.set_pos(old_pos);
+
+        // Freeze invalidated part of the codepage. We only want to wait for
+        // running instances of the code to exit from now on, so we shouldn't
+        // change the code. There could be other ractors sleeping in
+        // branch_stub_hit(), for example. We could harden this by changing memory
+        // protection on the frozen range.
+        assert!(
+            CodegenGlobals::get_inline_frozen_bytes() <= old_pos,
+            "frozen bytes should increase monotonically"
+        );
+        CodegenGlobals::set_inline_frozen_bytes(old_pos);
+
+        CodegenGlobals::get_outlined_cb()
+            .unwrap()
+            .mark_all_executable();
+        cb.mark_all_executable();
+    });
+}
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
new file mode 100644
index 0000000000..b7355f55e3
--- /dev/null
+++ b/yjit/src/lib.rs
@@ -0,0 +1,17 @@
+// Silence dead code warnings until we are done porting YJIT
+#![allow(unused_imports)]
+#![allow(dead_code)]
+#![allow(unused_assignments)]
+#![allow(unused_macros)]
+#![allow(clippy::style)] // We are laid back about style
+
+mod asm;
+mod codegen;
+mod core;
+mod cruby;
+mod disasm;
+mod invariants;
+mod options;
+mod stats;
+mod utils;
+mod yjit;
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
new file mode 100644
index 0000000000..669ac52dbd
--- /dev/null
+++ b/yjit/src/options.rs
@@ -0,0 +1,121 @@
+use std::ffi::CStr;
+
+// Command-line options
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[repr(C)]
+pub struct Options {
+    // Size of the executable memory block to allocate in MiB
+    pub exec_mem_size: usize,
+
+    // Number of method calls after which to start generating code
+    // Threshold==1 means compile on first execution
+    pub call_threshold: usize,
+
+    // Generate versions greedily until the limit is hit
+    pub greedy_versioning: bool,
+
+    // Disable the propagation of type information
+    pub no_type_prop: bool,
+
+    // Maximum number of versions per block
+    // 1 means always create generic versions
+    pub max_versions: usize,
+
+    // Capture and print out stats
+    pub gen_stats: bool,
+
+    /// Dump compiled and executed instructions for debugging
+    pub dump_insns: bool,
+
+    /// Verify context objects (debug mode only)
+    pub verify_ctx: bool,
+
+    /// Whether or not to assume a global constant state (and therefore
+    /// invalidating code whenever any constant changes) versus assuming
+    /// constant name components (and therefore invalidating code whenever a
+    /// matching name component changes)
+    pub global_constant_state: bool,
+}
+
+// Initialize the options to default values
+pub static mut OPTIONS: Options = Options {
+    exec_mem_size: 256,
+    call_threshold: 10,
+    greedy_versioning: false,
+    no_type_prop: false,
+    max_versions: 4,
+    gen_stats: false,
+    dump_insns: false,
+    verify_ctx: false,
+    global_constant_state: false,
+};
+
+/// Macro to get an option value by name
+macro_rules! get_option {
+    // Unsafe is ok here because options are initialized
+    // once before any Ruby code executes
+    ($option_name:ident) => {
+        unsafe { OPTIONS.$option_name }
+    };
+}
+pub(crate) use get_option;
+
+/// Expected to receive what comes after the third dash in "--yjit-*".
+/// Empty string means user passed only "--yjit". C code rejects when
+/// they pass exact "--yjit-".
+pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
+    let c_str: &CStr = unsafe { CStr::from_ptr(str_ptr) };
+    let opt_str: &str = c_str.to_str().ok()?;
+    //println!("{}", opt_str);
+
+    // Split the option name and value strings
+    // Note that some options do not contain an assignment
+    let parts = opt_str.split_once("=");
+    let (opt_name, opt_val) = match parts {
+        Some((before_eq, after_eq)) => (before_eq, after_eq),
+        None => (opt_str, ""),
+    };
+
+    // Match on the option name and value strings
+    match (opt_name, opt_val) {
+        ("", "") => (), // Simply --yjit
+
+        ("exec-mem-size", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("call-threshold", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.call_threshold = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("max-versions", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.max_versions = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
+        ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
+        ("stats", "") => unsafe { OPTIONS.gen_stats = true },
+        ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true },
+        ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true },
+        ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true },
+
+        // Option name not recognized
+        _ => {
+            return None;
+        }
+    }
+
+    // dbg!(unsafe {OPTIONS});
+
+    // Option successfully parsed
+    return Some(());
+}
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
new file mode 100644
index 0000000000..5e42e4d6f0
--- /dev/null
+++ b/yjit/src/stats.rs
@@ -0,0 +1,271 @@
+//! Everything related to the collection of runtime stats in YJIT
+//! See the stats feature and the --yjit-stats command-line option
+
+use crate::codegen::CodegenGlobals;
+use crate::cruby::*;
+use crate::options::*;
+use crate::yjit::yjit_enabled_p;
+
+// YJIT exit counts for each instruction type
+static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE] = [0; VM_INSTRUCTION_SIZE];
+
+// Macro to declare the stat counters
+macro_rules! make_counters {
+    ($($counter_name:ident,)+) => {
+        // Struct containing the counter values
+        #[derive(Default, Debug)]
+        pub struct Counters { $(pub $counter_name: u64),+ }
+
+        // Global counters instance, initialized to zero
+        pub static mut COUNTERS: Counters = Counters { $($counter_name: 0),+ };
+
+        // Counter names constant
+        const COUNTER_NAMES: &'static [&'static str] = &[ $(stringify!($counter_name)),+ ];
+
+        // Map a counter name string to a counter pointer
+        fn get_counter_ptr(name: &str) -> *mut u64 {
+            match name {
+                $( stringify!($counter_name) => { ptr_to_counter!($counter_name) } ),+
+                _ => panic!()
+            }
+        }
+    }
+}
+
+/// Macro to increment a counter by name
+macro_rules! incr_counter {
+    // Unsafe is ok here because options are initialized
+    // once before any Ruby code executes
+    ($counter_name:ident) => {
+        #[allow(unused_unsafe)]
+        {
+            unsafe { COUNTERS.$counter_name += 1 }
+        }
+    };
+}
+pub(crate) use incr_counter;
+
+/// Macro to get a raw pointer to a given counter
+macro_rules! ptr_to_counter {
+    ($counter_name:ident) => {
+        unsafe {
+            let ctr_ptr = std::ptr::addr_of_mut!(COUNTERS.$counter_name);
+            ctr_ptr
+        }
+    };
+}
+pub(crate) use ptr_to_counter;
+
+// Declare all the counters we track
+make_counters! {
+    exec_instruction,
+
+    send_keywords,
+    send_kw_splat,
+    send_args_splat,
+    send_block_arg,
+    send_ivar_set_method,
+    send_zsuper_method,
+    send_undef_method,
+    send_optimized_method,
+    send_optimized_method_send,
+    send_optimized_method_call,
+    send_optimized_method_block_call,
+    send_missing_method,
+    send_bmethod,
+    send_refined_method,
+    send_cfunc_ruby_array_varg,
+    send_cfunc_argc_mismatch,
+    send_cfunc_toomany_args,
+    send_cfunc_tracing,
+    send_cfunc_kwargs,
+    send_attrset_kwargs,
+    send_iseq_tailcall,
+    send_iseq_arity_error,
+    send_iseq_only_keywords,
+    send_iseq_kwargs_req_and_opt_missing,
+    send_iseq_kwargs_mismatch,
+    send_iseq_complex_callee,
+    send_not_implemented_method,
+    send_getter_arity,
+    send_se_cf_overflow,
+    send_se_protected_check_failed,
+
+    traced_cfunc_return,
+
+    invokesuper_me_changed,
+    invokesuper_block,
+
+    leave_se_interrupt,
+    leave_interp_return,
+    leave_start_pc_non_zero,
+
+    getivar_se_self_not_heap,
+    getivar_idx_out_of_range,
+    getivar_megamorphic,
+
+    setivar_se_self_not_heap,
+    setivar_idx_out_of_range,
+    setivar_val_heapobject,
+    setivar_name_not_mapped,
+    setivar_not_object,
+    setivar_frozen,
+
+    oaref_argc_not_one,
+    oaref_arg_not_fixnum,
+
+    opt_getinlinecache_miss,
+
+    binding_allocations,
+    binding_set,
+
+    vm_insns_count,
+    compiled_iseq_count,
+    compiled_block_count,
+    compilation_failure,
+
+    exit_from_branch_stub,
+
+    invalidation_count,
+    invalidate_method_lookup,
+    invalidate_bop_redefined,
+    invalidate_ractor_spawn,
+    invalidate_constant_state_bump,
+    invalidate_constant_ic_fill,
+
+    constant_state_bumps,
+
+    expandarray_splat,
+    expandarray_postarg,
+    expandarray_not_array,
+    expandarray_rhs_too_small,
+
+    gbpp_block_param_modified,
+    gbpp_block_handler_not_iseq,
+}
+
+//===========================================================================
+
+/// Primitive called in yjit.rb
+/// Check if stats generation is enabled
+#[no_mangle]
+pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    #[cfg(feature = "stats")]
+    if get_option!(gen_stats) {
+        return Qtrue;
+    }
+
+    return Qfalse;
+}
+
+/// Primitive called in yjit.rb.
+/// Export all YJIT statistics as a Ruby hash.
+#[no_mangle]
+pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
+}
+
+/// Export all YJIT statistics as a Ruby hash.
+fn rb_yjit_gen_stats_dict() -> VALUE {
+    // If YJIT is not enabled, return Qnil
+    if !yjit_enabled_p() {
+        return Qnil;
+    }
+
+    let hash = unsafe { rb_hash_new() };
+
+    // Inline and outlined code size
+    unsafe {
+        // Get the inline and outlined code blocks
+        let cb = CodegenGlobals::get_inline_cb();
+        let ocb = CodegenGlobals::get_outlined_cb();
+
+        // Inline code size
+        let key = rust_str_to_sym("inline_code_size");
+        let value = VALUE::fixnum_from_usize(cb.get_write_pos());
+        rb_hash_aset(hash, key, value);
+
+        // Outlined code size
+        let key = rust_str_to_sym("outlined_code_size");
+        let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
+        rb_hash_aset(hash, key, value);
+    }
+
+    // If we're not generating stats, the hash is done
+    if !get_option!(gen_stats) {
+        return hash;
+    }
+
+    // If the stats feature is enabled
+    #[cfg(feature = "stats")]
+    unsafe {
+        // Indicate that the complete set of stats is available
+        rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue);
+
+        // For each counter we track
+        for counter_name in COUNTER_NAMES {
+            // Get the counter value
+            let counter_ptr = get_counter_ptr(counter_name);
+            let counter_val = *counter_ptr;
+
+            // Put counter into hash
+            let key = rust_str_to_sym(counter_name);
+            let value = VALUE::fixnum_from_usize(counter_val as usize);
+            rb_hash_aset(hash, key, value);
+        }
+
+        // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME"
+        // and the value is the count of side exits for that instruction.
+        for op_idx in 0..VM_INSTRUCTION_SIZE {
+            let op_name = insn_name(op_idx);
+            let key_string = "exit_".to_owned() + &op_name;
+            let key = rust_str_to_sym(&key_string);
+            let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize);
+            rb_hash_aset(hash, key, value);
+        }
+    }
+
+    hash
+}
+
+/// Primitive called in yjit.rb. Zero out all the counters.
+#[no_mangle]
+pub extern "C" fn rb_yjit_reset_stats_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    unsafe {
+        EXIT_OP_COUNT = [0; VM_INSTRUCTION_SIZE];
+        COUNTERS = Counters::default();
+    }
+
+    return Qnil;
+}
+
+/// Increment the number of instructions executed by the interpreter
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_vm_usage_insn() {
+    incr_counter!(vm_insns_count);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_binding_alloc() {
+    incr_counter!(binding_allocations);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_binding_set() {
+    incr_counter!(binding_set);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_count_side_exit_op(exit_pc: *const VALUE) -> *const VALUE {
+    #[cfg(not(test))]
+    unsafe {
+        // Get the opcode from the encoded insn handler at this PC
+        let opcode = rb_vm_insn_addr2opcode((*exit_pc).as_ptr());
+
+        // Increment the exit op count for this opcode
+        EXIT_OP_COUNT[opcode as usize] += 1;
+    };
+
+    // This function must return exit_pc!
+    return exit_pc;
+}
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
new file mode 100644
index 0000000000..227e3e5f32
--- /dev/null
+++ b/yjit/src/utils.rs
@@ -0,0 +1,205 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::cruby::*;
+use std::slice;
+
+/// Trait for casting to [usize] that allows you to say `.as_usize()`.
+/// Implementation conditional on the the cast preserving the numeric value on
+/// all inputs and being inexpensive.
+///
+/// [usize] is only guaranteed to be more than 16-bit wide, so we can't use
+/// `.into()` to cast an `u32` or an `u64` to a `usize` even though in all
+/// the platforms YJIT supports these two casts are pretty much no-ops.
+/// We could say `as usize` or `.try_convert().unwrap()` everywhere
+/// for those casts but they both have undesirable consequences if and when
+/// we decide to support 32-bit platforms. Unfortunately we can't implement
+/// [::core::convert::From] for [usize] since both the trait and the type are
+/// external. Naming the method `into()` also runs into naming conflicts.
+pub(crate) trait IntoUsize {
+    /// Convert to usize. Implementation conditional on width of [usize].
+    fn as_usize(self) -> usize;
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoUsize for u64 {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoUsize for u32 {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+impl IntoUsize for u16 {
+    /// Alias for `.into()`. For convenience so you could use the trait for
+    /// all unsgined types.
+    fn as_usize(self) -> usize {
+        self.into()
+    }
+}
+
+impl IntoUsize for u8 {
+    /// Alias for `.into()`. For convenience so you could use the trait for
+    /// all unsgined types.
+    fn as_usize(self) -> usize {
+        self.into()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn min_max_preserved_after_cast_to_usize() {
+        use crate::utils::IntoUsize;
+
+        let min: usize = u64::MIN.as_usize();
+        assert_eq!(min, u64::MIN.try_into().unwrap());
+        let max: usize = u64::MAX.as_usize();
+        assert_eq!(max, u64::MAX.try_into().unwrap());
+
+        let min: usize = u32::MIN.as_usize();
+        assert_eq!(min, u32::MIN.try_into().unwrap());
+        let max: usize = u32::MAX.as_usize();
+        assert_eq!(max, u32::MAX.try_into().unwrap());
+    }
+}
+
+// TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper
+/*
+// For debugging. Print the bytecode for an iseq.
+RBIMPL_ATTR_MAYBE_UNUSED()
+static void
+yjit_print_iseq(const rb_iseq_t *iseq)
+{
+    char *ptr;
+    long len;
+    VALUE disassembly = rb_iseq_disasm(iseq);
+    RSTRING_GETMEM(disassembly, ptr, len);
+    fprintf(stderr, "%.*s\n", (int)len, ptr);
+}
+*/
+
+// Save caller-save registers on the stack before a C call
+fn push_regs(cb: &mut CodeBlock) {
+    push(cb, RAX);
+    push(cb, RCX);
+    push(cb, RDX);
+    push(cb, RSI);
+    push(cb, RDI);
+    push(cb, R8);
+    push(cb, R9);
+    push(cb, R10);
+    push(cb, R11);
+    pushfq(cb);
+}
+
+// Restore caller-save registers from the after a C call
+fn pop_regs(cb: &mut CodeBlock) {
+    popfq(cb);
+    pop(cb, R11);
+    pop(cb, R10);
+    pop(cb, R9);
+    pop(cb, R8);
+    pop(cb, RDI);
+    pop(cb, RSI);
+    pop(cb, RDX);
+    pop(cb, RCX);
+    pop(cb, RAX);
+}
+
+pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_int_fn(val: i64) {
+        println!("{}", val);
+    }
+
+    push_regs(cb);
+
+    match opnd {
+        X86Opnd::Mem(_) | X86Opnd::Reg(_) => {
+            // Sign-extend the value if necessary
+            if opnd.num_bits() < 64 {
+                movsx(cb, C_ARG_REGS[0], opnd);
+            } else {
+                mov(cb, C_ARG_REGS[0], opnd);
+            }
+        }
+        X86Opnd::Imm(_) | X86Opnd::UImm(_) => {
+            mov(cb, C_ARG_REGS[0], opnd);
+        }
+        _ => unreachable!(),
+    }
+
+    mov(cb, RAX, const_ptr_opnd(print_int_fn as *const u8));
+    call(cb, RAX);
+    pop_regs(cb);
+}
+
+/// Generate code to print a pointer
+pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_ptr_fn(ptr: *const u8) {
+        println!("{:p}", ptr);
+    }
+
+    assert!(opnd.num_bits() == 64);
+
+    push_regs(cb);
+    mov(cb, C_ARG_REGS[0], opnd);
+    mov(cb, RAX, const_ptr_opnd(print_ptr_fn as *const u8));
+    call(cb, RAX);
+    pop_regs(cb);
+}
+
+/// Generate code to print a value
+pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_value_fn(val: VALUE) {
+        unsafe { rb_obj_info_dump(val) }
+    }
+
+    assert!(opnd.num_bits() == 64);
+
+    push_regs(cb);
+
+    mov(cb, RDI, opnd);
+    mov(cb, RAX, const_ptr_opnd(print_value_fn as *const u8));
+    call(cb, RAX);
+
+    pop_regs(cb);
+}
+
+// Generate code to print constant string to stdout
+pub fn print_str(cb: &mut CodeBlock, str: &str) {
+    extern "sysv64" fn print_str_cfun(ptr: *const u8, num_bytes: usize) {
+        unsafe {
+            let slice = slice::from_raw_parts(ptr, num_bytes);
+            let str = std::str::from_utf8(slice).unwrap();
+            println!("{}", str);
+        }
+    }
+
+    let bytes = str.as_ptr();
+    let num_bytes = str.len();
+
+    push_regs(cb);
+
+    // Load the string address and jump over the string data
+    lea(cb, C_ARG_REGS[0], mem_opnd(8, RIP, 5));
+    jmp32(cb, num_bytes as i32);
+
+    // Write the string chars and a null terminator
+    for i in 0..num_bytes {
+        cb.write_byte(unsafe { *bytes.add(i) });
+    }
+
+    // Pass the string length as an argument
+    mov(cb, C_ARG_REGS[1], uimm_opnd(num_bytes as u64));
+
+    // Call the print function
+    mov(cb, RAX, const_ptr_opnd(print_str_cfun as *const u8));
+    call(cb, RAX);
+
+    pop_regs(cb);
+}
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
new file mode 100644
index 0000000000..24a6b426bf
--- /dev/null
+++ b/yjit/src/yjit.rs
@@ -0,0 +1,98 @@
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::invariants::*;
+use crate::options::*;
+
+use std::os::raw;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// For tracking whether the user enabled YJIT through command line arguments or environment
+/// variables. AtomicBool to avoid `unsafe`. On x86 it compiles to simple movs.
+/// See <https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html>
+/// See [rb_yjit_enabled_p]
+static YJIT_ENABLED: AtomicBool = AtomicBool::new(false);
+
+/// Parse one command-line option.
+/// This is called from ruby.c
+#[no_mangle]
+pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool {
+    return parse_option(str_ptr).is_some();
+}
+
+/// Is YJIT on? The interpreter uses this function to decide whether to increment
+/// ISEQ call counters. See mjit_exec().
+/// This is used frequently since it's used on every method call in the interpreter.
+#[no_mangle]
+pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int {
+    // Note that we might want to call this function from signal handlers so
+    // might need to ensure signal-safety(7).
+    YJIT_ENABLED.load(Ordering::Acquire).into()
+}
+
+/// Like rb_yjit_enabled_p, but for Rust code.
+pub fn yjit_enabled_p() -> bool {
+    YJIT_ENABLED.load(Ordering::Acquire)
+}
+
+/// After how many calls YJIT starts compiling a method
+#[no_mangle]
+pub extern "C" fn rb_yjit_call_threshold() -> raw::c_uint {
+    get_option!(call_threshold) as raw::c_uint
+}
+
+/// This function is called from C code
+#[no_mangle]
+pub extern "C" fn rb_yjit_init_rust() {
+    // TODO: need to make sure that command-line options have been
+    // initialized by CRuby
+
+    // Catch panics to avoid UB for unwinding into C frames.
+    // See https://doc.rust-lang.org/nomicon/exception-safety.html
+    // TODO: set a panic handler so the we don't print a message
+    //       everytime we panic.
+    let result = std::panic::catch_unwind(|| {
+        Invariants::init();
+        CodegenGlobals::init();
+
+        // YJIT enabled and initialized successfully
+        YJIT_ENABLED.store(true, Ordering::Release);
+    });
+
+    if let Err(_) = result {
+        println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
+        std::process::abort();
+    }
+}
+
+/// Called from C code to begin compiling a function
+/// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> *const u8 {
+    let maybe_code_ptr = gen_entry_point(iseq, ec);
+
+    match maybe_code_ptr {
+        Some(ptr) => ptr.raw_ptr(),
+        None => std::ptr::null(),
+    }
+}
+
+/// Simulate a situation where we are out of executable memory
+#[no_mangle]
+pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    // If YJIT is not enabled, do nothing
+    if !yjit_enabled_p() {
+        return Qnil;
+    }
+
+    // Enabled in debug mode only for security
+    #[cfg(debug_assertions)]
+    {
+        let cb = CodegenGlobals::get_inline_cb();
+        let ocb = CodegenGlobals::get_outlined_cb().unwrap();
+        cb.set_pos(cb.get_mem_size() - 1);
+        ocb.set_pos(ocb.get_mem_size() - 1);
+    }
+
+    return Qnil;
+}
author	Alan Wu <alanwu@ruby-lang.org>	2022-04-19 14:40:21 -0400
committer	Alan Wu <XrXr@users.noreply.github.com>	2022-04-27 11:00:22 -0400
commit	f90549cd38518231a6a74432fe1168c943a7cc18 (patch)
tree	c277bbfab47e230bd549bd5f607f60c3e812a714 /yjit
parent	f553180a86b71830a1de49dd04874b3880c5c698 (diff)