120 files changed, 16143 insertions, 13290 deletions
diff --git a/prism/api_pack.c b/prism/api_pack.c
deleted file mode 100644
index 98509ae65c..0000000000
--- a/prism/api_pack.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include "prism/extension.h"
-
-#ifdef PRISM_EXCLUDE_PACK
-
-void
-Init_prism_pack(void) {}
-
-#else
-
-static VALUE rb_cPrism;
-static VALUE rb_cPrismPack;
-static VALUE rb_cPrismPackDirective;
-static VALUE rb_cPrismPackFormat;
-
-static VALUE v3_2_0_symbol;
-static VALUE pack_symbol;
-static VALUE unpack_symbol;
-
-#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
-# define UINT64T2NUM(x) ULL2NUM(x)
-# define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
-#elif SIZEOF_UINT64_T == SIZEOF_LONG
-# define UINT64T2NUM(x) ULONG2NUM(x)
-# define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
-#else
-// error No uint64_t conversion
-#endif
-
-static VALUE
-pack_type_to_symbol(pm_pack_type type) {
-    switch (type) {
-        case PM_PACK_SPACE:
-            return ID2SYM(rb_intern("SPACE"));
-        case PM_PACK_COMMENT:
-            return ID2SYM(rb_intern("COMMENT"));
-        case PM_PACK_INTEGER:
-            return ID2SYM(rb_intern("INTEGER"));
-        case PM_PACK_UTF8:
-            return ID2SYM(rb_intern("UTF8"));
-        case PM_PACK_BER:
-            return ID2SYM(rb_intern("BER"));
-        case PM_PACK_FLOAT:
-            return ID2SYM(rb_intern("FLOAT"));
-        case PM_PACK_STRING_SPACE_PADDED:
-            return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
-        case PM_PACK_STRING_NULL_PADDED:
-            return ID2SYM(rb_intern("STRING_NULL_PADDED"));
-        case PM_PACK_STRING_NULL_TERMINATED:
-            return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
-        case PM_PACK_STRING_MSB:
-            return ID2SYM(rb_intern("STRING_MSB"));
-        case PM_PACK_STRING_LSB:
-            return ID2SYM(rb_intern("STRING_LSB"));
-        case PM_PACK_STRING_HEX_HIGH:
-            return ID2SYM(rb_intern("STRING_HEX_HIGH"));
-        case PM_PACK_STRING_HEX_LOW:
-            return ID2SYM(rb_intern("STRING_HEX_LOW"));
-        case PM_PACK_STRING_UU:
-            return ID2SYM(rb_intern("STRING_UU"));
-        case PM_PACK_STRING_MIME:
-            return ID2SYM(rb_intern("STRING_MIME"));
-        case PM_PACK_STRING_BASE64:
-            return ID2SYM(rb_intern("STRING_BASE64"));
-        case PM_PACK_STRING_FIXED:
-            return ID2SYM(rb_intern("STRING_FIXED"));
-        case PM_PACK_STRING_POINTER:
-            return ID2SYM(rb_intern("STRING_POINTER"));
-        case PM_PACK_MOVE:
-            return ID2SYM(rb_intern("MOVE"));
-        case PM_PACK_BACK:
-            return ID2SYM(rb_intern("BACK"));
-        case PM_PACK_NULL:
-            return ID2SYM(rb_intern("NULL"));
-        default:
-            return Qnil;
-    }
-}
-
-static VALUE
-pack_signed_to_symbol(pm_pack_signed signed_type) {
-    switch (signed_type) {
-        case PM_PACK_UNSIGNED:
-            return ID2SYM(rb_intern("UNSIGNED"));
-        case PM_PACK_SIGNED:
-            return ID2SYM(rb_intern("SIGNED"));
-        case PM_PACK_SIGNED_NA:
-            return ID2SYM(rb_intern("SIGNED_NA"));
-        default:
-            return Qnil;
-    }
-}
-
-static VALUE
-pack_endian_to_symbol(pm_pack_endian endian) {
-    switch (endian) {
-        case PM_PACK_AGNOSTIC_ENDIAN:
-            return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
-        case PM_PACK_LITTLE_ENDIAN:
-            return ID2SYM(rb_intern("LITTLE_ENDIAN"));
-        case PM_PACK_BIG_ENDIAN:
-            return ID2SYM(rb_intern("BIG_ENDIAN"));
-        case PM_PACK_NATIVE_ENDIAN:
-            return ID2SYM(rb_intern("NATIVE_ENDIAN"));
-        case PM_PACK_ENDIAN_NA:
-            return ID2SYM(rb_intern("ENDIAN_NA"));
-        default:
-            return Qnil;
-    }
-}
-
-static VALUE
-pack_size_to_symbol(pm_pack_size size) {
-    switch (size) {
-        case PM_PACK_SIZE_SHORT:
-            return ID2SYM(rb_intern("SIZE_SHORT"));
-        case PM_PACK_SIZE_INT:
-            return ID2SYM(rb_intern("SIZE_INT"));
-        case PM_PACK_SIZE_LONG:
-            return ID2SYM(rb_intern("SIZE_LONG"));
-        case PM_PACK_SIZE_LONG_LONG:
-            return ID2SYM(rb_intern("SIZE_LONG_LONG"));
-        case PM_PACK_SIZE_8:
-            return ID2SYM(rb_intern("SIZE_8"));
-        case PM_PACK_SIZE_16:
-            return ID2SYM(rb_intern("SIZE_16"));
-        case PM_PACK_SIZE_32:
-            return ID2SYM(rb_intern("SIZE_32"));
-        case PM_PACK_SIZE_64:
-            return ID2SYM(rb_intern("SIZE_64"));
-        case PM_PACK_SIZE_P:
-            return ID2SYM(rb_intern("SIZE_P"));
-        case PM_PACK_SIZE_NA:
-            return ID2SYM(rb_intern("SIZE_NA"));
-        default:
-            return Qnil;
-    }
-}
-
-static VALUE
-pack_length_type_to_symbol(pm_pack_length_type length_type) {
-    switch (length_type) {
-        case PM_PACK_LENGTH_FIXED:
-            return ID2SYM(rb_intern("LENGTH_FIXED"));
-        case PM_PACK_LENGTH_MAX:
-            return ID2SYM(rb_intern("LENGTH_MAX"));
-        case PM_PACK_LENGTH_RELATIVE:
-            return ID2SYM(rb_intern("LENGTH_RELATIVE"));
-        case PM_PACK_LENGTH_NA:
-            return ID2SYM(rb_intern("LENGTH_NA"));
-        default:
-            return Qnil;
-    }
-}
-
-static VALUE
-pack_encoding_to_ruby(pm_pack_encoding encoding) {
-    int index;
-    switch (encoding) {
-        case PM_PACK_ENCODING_ASCII_8BIT:
-            index = rb_ascii8bit_encindex();
-            break;
-        case PM_PACK_ENCODING_US_ASCII:
-            index = rb_usascii_encindex();
-            break;
-        case PM_PACK_ENCODING_UTF_8:
-            index = rb_utf8_encindex();
-            break;
-        default:
-            return Qnil;
-    }
-    return rb_enc_from_encoding(rb_enc_from_index(index));
-}
-
-/**
- * call-seq:
- *   Pack::parse(version, variant, source) -> Format
- *
- * Parse the given source and return a format object.
- */
-static VALUE
-pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
-    if (version_symbol != v3_2_0_symbol) {
-        rb_raise(rb_eArgError, "invalid version");
-    }
-
-    pm_pack_variant variant;
-    if (variant_symbol == pack_symbol) {
-        variant = PM_PACK_VARIANT_PACK;
-    } else if (variant_symbol == unpack_symbol) {
-        variant = PM_PACK_VARIANT_UNPACK;
-    } else {
-        rb_raise(rb_eArgError, "invalid variant");
-    }
-
-    StringValue(format_string);
-
-    const char *format = RSTRING_PTR(format_string);
-    const char *format_end = format + RSTRING_LEN(format_string);
-    pm_pack_encoding encoding = PM_PACK_ENCODING_START;
-
-    VALUE directives_array = rb_ary_new();
-
-    while (format < format_end) {
-        pm_pack_type type;
-        pm_pack_signed signed_type;
-        pm_pack_endian endian;
-        pm_pack_size size;
-        pm_pack_length_type length_type;
-        uint64_t length;
-
-        const char *directive_start = format;
-
-        pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
-                                                    &size, &length_type, &length, &encoding);
-
-        const char *directive_end = format;
-
-        switch (parse_result) {
-            case PM_PACK_OK:
-                break;
-            case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
-                rb_raise(rb_eArgError, "unsupported directive");
-            case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
-                rb_raise(rb_eArgError, "unsupported directive");
-            case PM_PACK_ERROR_LENGTH_TOO_BIG:
-                rb_raise(rb_eRangeError, "pack length too big");
-            case PM_PACK_ERROR_BANG_NOT_ALLOWED:
-                rb_raise(rb_eRangeError, "bang not allowed");
-            case PM_PACK_ERROR_DOUBLE_ENDIAN:
-                rb_raise(rb_eRangeError, "double endian");
-            default:
-                rb_bug("parse result");
-        }
-
-        if (type == PM_PACK_END) {
-            break;
-        }
-
-        VALUE directive_args[9] = {
-            version_symbol,
-            variant_symbol,
-            rb_usascii_str_new(directive_start, directive_end - directive_start),
-            pack_type_to_symbol(type),
-            pack_signed_to_symbol(signed_type),
-            pack_endian_to_symbol(endian),
-            pack_size_to_symbol(size),
-            pack_length_type_to_symbol(length_type),
-            UINT64T2NUM(length)
-        };
-
-        rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
-    }
-
-    VALUE format_args[2];
-    format_args[0] = directives_array;
-    format_args[1] = pack_encoding_to_ruby(encoding);
-    return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
-}
-
-/**
- * The function that gets called when Ruby initializes the prism extension.
- */
-void
-Init_prism_pack(void) {
-    rb_cPrism = rb_define_module("Prism");
-    rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
-    rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
-    rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
-    rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
-
-    v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
-    pack_symbol = ID2SYM(rb_intern("pack"));
-    unpack_symbol = ID2SYM(rb_intern("unpack"));
-}
-
-#endif
diff --git a/prism/arena.c b/prism/arena.c
new file mode 100644
index 0000000000..64a731649d
--- /dev/null
+++ b/prism/arena.c
@@ -0,0 +1,117 @@
+#include "prism/internal/arena.h"
+
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * Compute the block allocation size using offsetof so it is correct regardless
+ * of PM_FLEX_ARRAY_LENGTH.
+ */
+#define PM_ARENA_BLOCK_SIZE(data_size) (offsetof(pm_arena_block_t, data) + (data_size))
+
+/** Initial block data size: 8 KB. */
+#define PM_ARENA_INITIAL_SIZE 8192
+
+/** Double the block size every this many blocks. */
+#define PM_ARENA_GROWTH_INTERVAL 8
+
+/** Maximum block data size: 1 MB. */
+#define PM_ARENA_MAX_SIZE (1024 * 1024)
+
+/**
+ * Compute the data size for the next block.
+ */
+static size_t
+pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) {
+    size_t size = PM_ARENA_INITIAL_SIZE;
+
+    for (size_t exp = PM_ARENA_GROWTH_INTERVAL; exp <= arena->block_count; exp += PM_ARENA_GROWTH_INTERVAL) {
+        if (size < PM_ARENA_MAX_SIZE) size *= 2;
+    }
+
+    return size > min_size ? size : min_size;
+}
+
+/**
+ * Allocate a new block with the given data capacity and initial usage, link it
+ * into the arena, and return it. Aborts on allocation failure.
+ */
+static pm_arena_block_t *
+pm_arena_block_new(pm_arena_t *arena, size_t data_size, size_t initial_used) {
+    assert(initial_used <= data_size);
+    pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(data_size));
+
+    if (block == NULL) {
+        fprintf(stderr, "prism: out of memory; aborting\n");
+        abort();
+    }
+
+    block->capacity = data_size;
+    block->used = initial_used;
+    block->prev = arena->current;
+    arena->current = block;
+    arena->block_count++;
+
+    return block;
+}
+
+/**
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ */
+void
+pm_arena_reserve(pm_arena_t *arena, size_t capacity) {
+    if (capacity <= PM_ARENA_INITIAL_SIZE) return;
+    if (arena->current != NULL && (arena->current->capacity - arena->current->used) >= capacity) return;
+    pm_arena_block_new(arena, capacity, 0);
+}
+
+/**
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Called when the current block has insufficient space.
+ */
+void *
+pm_arena_alloc_slow(pm_arena_t *arena, size_t size) {
+    size_t block_data_size = pm_arena_next_block_size(arena, size);
+    pm_arena_block_t *block = pm_arena_block_new(arena, block_data_size, size);
+    return block->data;
+}
+
+/**
+ * Returns a newly allocated and initialized arena.
+ */
+pm_arena_t *
+pm_arena_new(void) {
+    pm_arena_t *arena = (pm_arena_t *) xcalloc(1, sizeof(pm_arena_t));
+    if (arena == NULL) abort();
+    return arena;
+}
+
+/**
+ * Free all blocks in the arena.
+ */
+void
+pm_arena_cleanup(pm_arena_t *arena) {
+    pm_arena_block_t *block = arena->current;
+
+    while (block != NULL) {
+        pm_arena_block_t *prev = block->prev;
+        xfree_sized(block, PM_ARENA_BLOCK_SIZE(block->capacity));
+        block = prev;
+    }
+
+    *arena = (pm_arena_t) { 0 };
+}
+
+/**
+ * Frees both the held memory and the arena itself.
+ */
+void
+pm_arena_free(pm_arena_t *arena) {
+    pm_arena_cleanup(arena);
+    xfree_sized(arena, sizeof(pm_arena_t));
+}
diff --git a/prism/arena.h b/prism/arena.h
new file mode 100644
index 0000000000..e1fa8fc6ad
--- /dev/null
+++ b/prism/arena.h
@@ -0,0 +1,37 @@
+/**
+ * @file arena.h
+ *
+ * A bump allocator for the prism parser.
+ */
+#ifndef PRISM_ARENA_H
+#define PRISM_ARENA_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+
+/**
+ * An opaque pointer to an arena that is used for allocations.
+ */
+typedef struct pm_arena_t pm_arena_t;
+
+/**
+ * Returns a newly allocated and initialized arena. If the arena cannot be
+ * allocated, this function aborts the process.
+ *
+ * @returns A pointer to the newly allocated arena. It is the responsibility of
+ *     the caller to free the arena using pm_arena_free when it is no longer
+ *     needed.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_arena_t * pm_arena_new(void);
+
+/**
+ * Frees both the held memory and the arena itself.
+ *
+ * @param arena The arena to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_buffer.c b/prism/buffer.c
index 2136a7c43e..cb3b9a4fe8 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/buffer.c
@@ -1,31 +1,38 @@
-#include "prism/util/pm_buffer.h"
+#include "prism/internal/buffer.h"
 
-/**
- * Return the size of the pm_buffer_t struct.
- */
-size_t
-pm_buffer_sizeof(void) {
-    return sizeof(pm_buffer_t);
-}
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/char.h"
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 /**
  * Initialize a pm_buffer_t with the given capacity.
  */
-bool
-pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
+void
+pm_buffer_init(pm_buffer_t *buffer, size_t capacity) {
     buffer->length = 0;
     buffer->capacity = capacity;
 
     buffer->value = (char *) xmalloc(capacity);
-    return buffer->value != NULL;
+    if (buffer->value == NULL) abort();
 }
 
 /**
- * Initialize a pm_buffer_t with its default values.
+ * Allocate and initialize a new buffer.
  */
-bool
-pm_buffer_init(pm_buffer_t *buffer) {
-    return pm_buffer_init_capacity(buffer, 1024);
+pm_buffer_t *
+pm_buffer_new(void) {
+    pm_buffer_t *buffer = (pm_buffer_t *) xmalloc(sizeof(pm_buffer_t));
+    if (buffer == NULL) abort();
+
+    pm_buffer_init(buffer, 1024);
+    return buffer;
 }
 
 /**
@@ -47,9 +54,10 @@ pm_buffer_length(const pm_buffer_t *buffer) {
 /**
  * Append the given amount of space to the buffer.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
     size_t next_length = buffer->length + length;
+    const size_t original_capacity = buffer->capacity;
 
     if (next_length > buffer->capacity) {
         if (buffer->capacity == 0) {
@@ -60,7 +68,7 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
             buffer->capacity *= 2;
         }
 
-        buffer->value = xrealloc(buffer->value, buffer->capacity);
+        buffer->value = xrealloc_sized(buffer->value, buffer->capacity, original_capacity);
         if (buffer->value == NULL) return false;
     }
 
@@ -71,7 +79,7 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
 /**
  * Append a generic pointer to memory to the buffer.
  */
-static inline void
+static PRISM_INLINE void
 pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
     size_t cursor = buffer->length;
     if (pm_buffer_append_length(buffer, length)) {
@@ -349,9 +357,18 @@ pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t le
 }
 
 /**
- * Free the memory associated with the buffer.
+ * Free the memory held by the buffer.
+ */
+void
+pm_buffer_cleanup(pm_buffer_t *buffer) {
+    xfree_sized(buffer->value, buffer->capacity);
+}
+
+/**
+ * Free both the memory held by the buffer and the buffer itself.
  */
 void
 pm_buffer_free(pm_buffer_t *buffer) {
-    xfree(buffer->value);
+    pm_buffer_cleanup(buffer);
+    xfree_sized(buffer, sizeof(pm_buffer_t));
 }
diff --git a/prism/buffer.h b/prism/buffer.h
new file mode 100644
index 0000000000..24b572d2c3
--- /dev/null
+++ b/prism/buffer.h
@@ -0,0 +1,52 @@
+/**
+ * @file buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
+#ifndef PRISM_BUFFER_H
+#define PRISM_BUFFER_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+
+/**
+ * A wrapper around a contiguous block of allocated memory.
+ */
+typedef struct pm_buffer_t pm_buffer_t;
+
+/**
+ * Allocate and initialize a new buffer. If the buffer cannot be allocated, this
+ * function will abort the process.
+ *
+ * @returns A pointer to the initialized buffer. The caller is responsible for
+ *     freeing the buffer with pm_buffer_free.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_buffer_t * pm_buffer_new(void);
+
+/**
+ * Free both the memory held by the buffer and the buffer itself.
+ *
+ * @param buffer The buffer to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+/**
+ * Return the value of the buffer.
+ *
+ * @param buffer The buffer to get the value of.
+ * @returns The value of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+/**
+ * Return the length of the buffer.
+ *
+ * @param buffer The buffer to get the length of.
+ * @returns The length of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/char.c
index a51dc11645..08e457aa1f 100644
--- a/prism/util/pm_char.c
+++ b/prism/char.c
@@ -1,7 +1,8 @@
-#include "prism/util/pm_char.h"
+#include "prism/internal/char.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/internal/line_offset_list.h"
 
-#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
-#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
 #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
 
 #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
@@ -13,7 +14,7 @@
 #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
 #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
 
-static const uint8_t pm_byte_table[256] = {
+const uint8_t pm_byte_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
     0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -57,7 +58,7 @@ static const uint8_t pm_number_table[256] = {
  * Returns the number of characters at the start of the string that match the
  * given kind. Disallows searching past the given maximum number of characters.
  */
-static inline size_t
+static PRISM_INLINE size_t
 pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
     if (length <= 0) return 0;
 
@@ -83,15 +84,15 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
  * searching past the given maximum number of characters.
  */
 size_t
-pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
+pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
     if (length <= 0) return 0;
 
-    size_t size = 0;
-    size_t maximum = (size_t) length;
+    uint32_t size = 0;
+    uint32_t maximum = (uint32_t) length;
 
     while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
         if (string[size] == '\n') {
-            pm_newline_list_append(newline_list, string + size);
+            pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1);
         }
 
         size++;
@@ -101,15 +102,6 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newlin
 }
 
 /**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
-    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
-
-/**
  * Returns the number of characters at the start of the string that are regexp
  * options. Disallows searching past the given maximum number of characters.
  */
@@ -118,36 +110,13 @@ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
     return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
 }
 
-/**
- * Returns true if the given character matches the given kind.
- */
-static inline bool
-pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
-    return (pm_byte_table[b] & kind) != 0;
-}
-
-/**
- * Returns true if the given character is a whitespace character.
- */
-bool
-pm_char_is_whitespace(const uint8_t b) {
-    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
-}
-
-/**
- * Returns true if the given character is an inline whitespace character.
- */
-bool
-pm_char_is_inline_whitespace(const uint8_t b) {
-    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
 
 /**
  * Scan through the string and return the number of characters at the start of
  * the string that match the given kind. Disallows searching past the given
  * maximum number of characters.
  */
-static inline size_t
+static PRISM_INLINE size_t
 pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
     if (length <= 0) return 0;
 
@@ -166,7 +135,7 @@ pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
  * Additionally, report the location of the last invalid underscore character
  * found in the string through the out invalid parameter.
  */
-static inline size_t
+static PRISM_INLINE size_t
 pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
     if (length <= 0) return 0;
 
@@ -267,7 +236,7 @@ pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint
 /**
  * Returns true if the given character matches the given kind.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
     return (pm_number_table[b] & kind) != 0;
 }
@@ -303,16 +272,3 @@ bool
 pm_char_is_hexadecimal_digit(const uint8_t b) {
     return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
 }
-
-#undef PRISM_CHAR_BIT_WHITESPACE
-#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
-#undef PRISM_CHAR_BIT_REGEXP_OPTION
-
-#undef PRISM_NUMBER_BIT_BINARY_DIGIT
-#undef PRISM_NUMBER_BIT_BINARY_NUMBER
-#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
-#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
-#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
-#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/comments.h b/prism/comments.h
new file mode 100644
index 0000000000..2270d53889
--- /dev/null
+++ b/prism/comments.h
@@ -0,0 +1,43 @@
+/**
+ * @file comments.h
+ *
+ * Types and functions related to comments found during parsing.
+ */
+#ifndef PRISM_COMMENTS_H
+#define PRISM_COMMENTS_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+#include <stddef.h>
+
+/** This is the type of a comment that we've found while parsing. */
+typedef enum {
+    PM_COMMENT_INLINE,
+    PM_COMMENT_EMBDOC
+} pm_comment_type_t;
+
+/** An opaque pointer to a comment found while parsing. */
+typedef struct pm_comment_t pm_comment_t;
+
+/**
+ * Returns the location associated with the given comment.
+ *
+ * @param comment the comment whose location we want to get
+ * @returns the location associated with the given comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment) PRISM_NONNULL(1);
+
+/**
+ * Returns the type associated with the given comment.
+ *
+ * @param comment the comment whose type we want to get
+ * @returns the type associated with the given comment. This can either be
+ *     PM_COMMENT_INLINE or PM_COMMENT_EMBDOC.
+ */
+PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/compiler/accel.h b/prism/compiler/accel.h
new file mode 100644
index 0000000000..be23236d1d
--- /dev/null
+++ b/prism/compiler/accel.h
@@ -0,0 +1,19 @@
+/**
+ * @file compiler/accel.h
+ */
+#ifndef PRISM_COMPILER_ACCEL_H
+#define PRISM_COMPILER_ACCEL_H
+
+/**
+ * Platform detection for SIMD/fast-path implementations. At most one of these
+ * macros is defined, selecting the best available vectorization strategy.
+ */
+#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64))
+#   define PRISM_HAS_NEON
+#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64))
+#   define PRISM_HAS_SSSE3
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#   define PRISM_HAS_SWAR
+#endif
+
+#endif
diff --git a/prism/compiler/align.h b/prism/compiler/align.h
new file mode 100644
index 0000000000..22cb49a48c
--- /dev/null
+++ b/prism/compiler/align.h
@@ -0,0 +1,36 @@
+/**
+ * @file compiler/align.h
+ */
+#ifndef PRISM_COMPILER_ALIGN_H
+#define PRISM_COMPILER_ALIGN_H
+
+/**
+ * Compiler-agnostic macros for specifying alignment of types and variables.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS _Alignas
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF _Alignof
+#elif defined(__GNUC__) || defined(__clang__)
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS(size) __attribute__((aligned(size)))
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) __alignof__(type)
+#elif defined(_MSC_VER)
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS(size) __declspec(align(size))
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) __alignof(type)
+#else
+    /** Void because this platform does not support specifying alignment. */
+    #define PRISM_ALIGNAS(size)
+
+    /** Fallback to sizeof as alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) sizeof(type)
+#endif
+
+#endif
diff --git a/prism/compiler/exported.h b/prism/compiler/exported.h
new file mode 100644
index 0000000000..823773ecbb
--- /dev/null
+++ b/prism/compiler/exported.h
@@ -0,0 +1,24 @@
+/**
+ * @file compiler/exported.h
+ */
+#ifndef PRISM_COMPILER_EXPORTED_H
+#define PRISM_COMPILER_EXPORTED_H
+
+/**
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
+#ifndef PRISM_EXPORTED_FUNCTION
+#   ifdef PRISM_EXPORT_SYMBOLS
+#       ifdef _WIN32
+#          define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
+#       else
+#          define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
+#       endif
+#   else
+#       define PRISM_EXPORTED_FUNCTION
+#   endif
+#endif
+
+#endif
diff --git a/prism/compiler/fallthrough.h b/prism/compiler/fallthrough.h
new file mode 100644
index 0000000000..ce1b450e8a
--- /dev/null
+++ b/prism/compiler/fallthrough.h
@@ -0,0 +1,22 @@
+/**
+ * @file compiler/fallthrough.h
+ */
+#ifndef PRISM_COMPILER_FALLTHROUGH_H
+#define PRISM_COMPILER_FALLTHROUGH_H
+
+/**
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through
+ * between cases of a switch. Use PRISM_FALLTHROUGH to explicitly annotate cases
+ * where the fallthrough is intentional.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 or later */
+    #define PRISM_FALLTHROUGH [[fallthrough]];
+#elif defined(__GNUC__) || defined(__clang__)
+    #define PRISM_FALLTHROUGH __attribute__((fallthrough));
+#elif defined(_MSC_VER)
+    #define PRISM_FALLTHROUGH __fallthrough;
+#else
+    #define PRISM_FALLTHROUGH
+#endif
+
+#endif
diff --git a/prism/compiler/filesystem.h b/prism/compiler/filesystem.h
new file mode 100644
index 0000000000..f988909db8
--- /dev/null
+++ b/prism/compiler/filesystem.h
@@ -0,0 +1,32 @@
+/**
+ * @file compiler/filesystem.h
+ *
+ * Platform detection for mmap and filesystem support.
+ */
+#ifndef PRISM_COMPILER_FILESYSTEM_H
+#define PRISM_COMPILER_FILESYSTEM_H
+
+/**
+ * In general, libc for embedded systems does not support memory-mapped files.
+ * If the target platform is POSIX or Windows, we can map a file in memory and
+ * read it in a more efficient manner.
+ */
+#ifdef _WIN32
+#   define PRISM_HAS_MMAP
+#else
+#   include <unistd.h>
+#   ifdef _POSIX_MAPPED_FILES
+#       define PRISM_HAS_MMAP
+#   endif
+#endif
+
+/**
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
+ * related code from the library. All filesystem related code should be guarded
+ * by PRISM_HAS_FILESYSTEM.
+ */
+#ifndef PRISM_HAS_NO_FILESYSTEM
+#   define PRISM_HAS_FILESYSTEM
+#endif
+
+#endif
diff --git a/prism/compiler/flex_array.h b/prism/compiler/flex_array.h
new file mode 100644
index 0000000000..7504b5fdd3
--- /dev/null
+++ b/prism/compiler/flex_array.h
@@ -0,0 +1,19 @@
+/**
+ * @file compiler/flex_array.h
+ */
+#ifndef PRISM_COMPILER_FLEX_ARRAY_H
+#define PRISM_COMPILER_FLEX_ARRAY_H
+
+/**
+ * A macro for helper define a flexible array member. C99 supports `data[]`, GCC
+ * supports `data[0]` as an extension, and older compilers require `data[1]`.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+    #define PM_FLEX_ARRAY_LENGTH   /* data[] */
+#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
+    #define PM_FLEX_ARRAY_LENGTH 0 /* data[0] */
+#else
+    #define PM_FLEX_ARRAY_LENGTH 1 /* data[1] */
+#endif
+
+#endif
diff --git a/prism/compiler/force_inline.h b/prism/compiler/force_inline.h
new file mode 100644
index 0000000000..e189d592d6
--- /dev/null
+++ b/prism/compiler/force_inline.h
@@ -0,0 +1,21 @@
+/**
+ * @file compiler/force_inline.h
+ */
+#ifndef PRISM_COMPILER_FORCE_INLINE_H
+#define PRISM_COMPILER_FORCE_INLINE_H
+
+#include "prism/compiler/inline.h"
+
+/**
+ * Force a function to be inlined at every call site. Use sparingly — only for
+ * small, hot functions where the compiler's heuristics fail to inline.
+ */
+#if defined(_MSC_VER)
+#   define PRISM_FORCE_INLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__)
+#   define PRISM_FORCE_INLINE PRISM_INLINE __attribute__((always_inline))
+#else
+#   define PRISM_FORCE_INLINE PRISM_INLINE
+#endif
+
+#endif
diff --git a/prism/compiler/format.h b/prism/compiler/format.h
new file mode 100644
index 0000000000..32f4c3c6d7
--- /dev/null
+++ b/prism/compiler/format.h
@@ -0,0 +1,25 @@
+/**
+ * @file compiler/format.h
+ */
+#ifndef PRISM_COMPILER_FORMAT_H
+#define PRISM_COMPILER_FORMAT_H
+
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   if defined(__MINGW_PRINTF_FORMAT)
+#       define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(__MINGW_PRINTF_FORMAT, fmt_idx_, arg_idx_)))
+#   else
+#       define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(printf, fmt_idx_, arg_idx_)))
+#   endif
+#elif defined(__clang__)
+#   define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((__format__(__printf__, fmt_idx_, arg_idx_)))
+#else
+#   define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_)
+#endif
+
+#endif
diff --git a/prism/compiler/inline.h b/prism/compiler/inline.h
new file mode 100644
index 0000000000..856a375691
--- /dev/null
+++ b/prism/compiler/inline.h
@@ -0,0 +1,17 @@
+/**
+ * @file compiler/inline.h
+ */
+#ifndef PRISM_COMPILER_INLINE_H
+#define PRISM_COMPILER_INLINE_H
+
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
+#if defined(_MSC_VER) && !defined(inline)
+#   define PRISM_INLINE __inline
+#else
+#   define PRISM_INLINE inline
+#endif
+
+#endif
diff --git a/prism/compiler/nodiscard.h b/prism/compiler/nodiscard.h
new file mode 100644
index 0000000000..ccd6c00719
--- /dev/null
+++ b/prism/compiler/nodiscard.h
@@ -0,0 +1,22 @@
+/**
+ * @file compiler/nodiscard.h
+ */
+#ifndef PRISM_COMPILER_NODISCARD_H
+#define PRISM_COMPILER_NODISCARD_H
+
+/**
+ * Mark the return value of a function as important so that the compiler warns
+ * if a caller ignores it. This is useful for functions that return error codes
+ * or allocated resources that must be freed.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#   define PRISM_NODISCARD [[nodiscard]]
+#elif defined(__GNUC__) || defined(__clang__)
+#   define PRISM_NODISCARD __attribute__((__warn_unused_result__))
+#elif defined(_MSC_VER)
+#   define PRISM_NODISCARD _Check_return_
+#else
+#   define PRISM_NODISCARD
+#endif
+
+#endif
diff --git a/prism/compiler/nonnull.h b/prism/compiler/nonnull.h
new file mode 100644
index 0000000000..9d19355665
--- /dev/null
+++ b/prism/compiler/nonnull.h
@@ -0,0 +1,18 @@
+/**
+ * @file compiler/nonnull.h
+ */
+#ifndef PRISM_COMPILER_NONNULL_H
+#define PRISM_COMPILER_NONNULL_H
+
+/**
+ * Mark the parameters of a function as non-null. This allows the compiler to
+ * warn if a caller passes NULL for a parameter that should never be NULL. The
+ * arguments are the 1-based indices of the parameters.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#   define PRISM_NONNULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
+#else
+#   define PRISM_NONNULL(...)
+#endif
+
+#endif
diff --git a/prism/compiler/unused.h b/prism/compiler/unused.h
new file mode 100644
index 0000000000..6a9e125dde
--- /dev/null
+++ b/prism/compiler/unused.h
@@ -0,0 +1,18 @@
+/**
+ * @file compiler/unused.h
+ */
+#ifndef PRISM_COMPILER_UNUSED_H
+#define PRISM_COMPILER_UNUSED_H
+
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   define PRISM_UNUSED __attribute__((unused))
+#else
+#   define PRISM_UNUSED
+#endif
+
+#endif
diff --git a/prism/config.yml b/prism/config.yml
index 3d5eee190f..bbbc5f3d33 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -17,6 +17,8 @@ errors:
   - ARGUMENT_FORWARDING_UNBOUND
   - ARGUMENT_NO_FORWARDING_AMPERSAND
   - ARGUMENT_NO_FORWARDING_ELLIPSES
+  - ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA
+  - ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK
   - ARGUMENT_NO_FORWARDING_STAR
   - ARGUMENT_NO_FORWARDING_STAR_STAR
   - ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT
@@ -60,7 +62,9 @@ errors:
   - CONDITIONAL_WHILE_PREDICATE
   - CONSTANT_PATH_COLON_COLON_CONSTANT
   - DEF_ENDLESS
+  - DEF_ENDLESS_PARAMETERS
   - DEF_ENDLESS_SETTER
+  - DEF_ENDLESS_DO_BLOCK
   - DEF_NAME
   - DEF_PARAMS_TERM
   - DEF_PARAMS_TERM_PAREN
@@ -101,6 +105,8 @@ errors:
   - EXPECT_FOR_DELIMITER
   - EXPECT_IDENT_REQ_PARAMETER
   - EXPECT_IN_DELIMITER
+  - EXPECT_LPAREN_AFTER_NOT_LPAREN
+  - EXPECT_LPAREN_AFTER_NOT_OTHER
   - EXPECT_LPAREN_REQ_PARAMETER
   - EXPECT_MESSAGE
   - EXPECT_RBRACKET
@@ -216,6 +222,7 @@ errors:
   - PARAMETER_WILD_LOOSE_COMMA
   - PATTERN_ARRAY_MULTIPLE_RESTS
   - PATTERN_CAPTURE_DUPLICATE
+  - PATTERN_CAPTURE_IN_ALTERNATIVE
   - PATTERN_EXPRESSION_AFTER_BRACKET
   - PATTERN_EXPRESSION_AFTER_COMMA
   - PATTERN_EXPRESSION_AFTER_HROCKET
@@ -241,7 +248,9 @@ errors:
   - PATTERN_TERM_PAREN
   - PIPEPIPEEQ_MULTI_ASSIGN
   - REGEXP_ENCODING_OPTION_MISMATCH
+  - REGEXP_ESCAPED_NON_ASCII_IN_UTF8
   - REGEXP_INCOMPAT_CHAR_ENCODING
+  - REGEXP_INVALID_CHAR_PROPERTY
   - REGEXP_INVALID_UNICODE_RANGE
   - REGEXP_NON_ESCAPED_MBC
   - REGEXP_PARSE_ERROR
@@ -277,6 +286,7 @@ errors:
   - UNEXPECTED_INDEX_KEYWORDS
   - UNEXPECTED_LABEL
   - UNEXPECTED_MULTI_WRITE
+  - UNEXPECTED_PARAMETER_DEFAULT_VALUE
   - UNEXPECTED_RANGE_OPERATOR
   - UNEXPECTED_SAFE_NAVIGATION
   - UNEXPECTED_TOKEN_CLOSE_CONTEXT
@@ -320,13 +330,44 @@ warnings:
   - UNUSED_LOCAL_VARIABLE
   - VOID_STATEMENT
 tokens:
+  # The order of the tokens at the beginning is important, because we use them
+  # for a lookup table.
   - name: EOF
     value: 1
     comment: final token in the file
-  - name: MISSING
-    comment: "a token that was expected but not found"
-  - name: NOT_PROVIDED
-    comment: "a token that was not present but it is okay"
+  - name: BRACE_RIGHT
+    comment: "}"
+  - name: COMMA
+    comment: ","
+  - name: EMBEXPR_END
+    comment: "}"
+  - name: KEYWORD_DO
+    comment: "do"
+  - name: KEYWORD_ELSE
+    comment: "else"
+  - name: KEYWORD_ELSIF
+    comment: "elsif"
+  - name: KEYWORD_END
+    comment: "end"
+  - name: KEYWORD_ENSURE
+    comment: "ensure"
+  - name: KEYWORD_IN
+    comment: "in"
+  - name: KEYWORD_RESCUE
+    comment: "rescue"
+  - name: KEYWORD_THEN
+    comment: "then"
+  - name: KEYWORD_WHEN
+    comment: "when"
+  - name: NEWLINE
+    comment: "a newline character outside of other tokens"
+  - name: PARENTHESIS_RIGHT
+    comment: ")"
+  - name: PIPE
+    comment: "|"
+  - name: SEMICOLON
+    comment: ";"
+  # Tokens from here on are not used for lookup, and can be in any order.
   - name: AMPERSAND
     comment: "&"
   - name: AMPERSAND_AMPERSAND
@@ -349,8 +390,6 @@ tokens:
     comment: "!~"
   - name: BRACE_LEFT
     comment: "{"
-  - name: BRACE_RIGHT
-    comment: "}"
   - name: BRACKET_LEFT
     comment: "["
   - name: BRACKET_LEFT_ARRAY
@@ -373,8 +412,6 @@ tokens:
     comment: ":"
   - name: COLON_COLON
     comment: "::"
-  - name: COMMA
-    comment: ","
   - name: COMMENT
     comment: "a comment"
   - name: CONSTANT
@@ -393,8 +430,6 @@ tokens:
     comment: "a line inside of embedded documentation"
   - name: EMBEXPR_BEGIN
     comment: "#{"
-  - name: EMBEXPR_END
-    comment: "}"
   - name: EMBVAR
     comment: "#"
   - name: EQUAL
@@ -461,20 +496,12 @@ tokens:
     comment: "def"
   - name: KEYWORD_DEFINED
     comment: "defined?"
-  - name: KEYWORD_DO
-    comment: "do"
+  - name: KEYWORD_DO_BLOCK
+    comment: "do keyword for a block attached to a command"
   - name: KEYWORD_DO_LOOP
     comment: "do keyword for a predicate in a while, until, or for loop"
-  - name: KEYWORD_ELSE
-    comment: "else"
-  - name: KEYWORD_ELSIF
-    comment: "elsif"
-  - name: KEYWORD_END
-    comment: "end"
   - name: KEYWORD_END_UPCASE
     comment: "END"
-  - name: KEYWORD_ENSURE
-    comment: "ensure"
   - name: KEYWORD_FALSE
     comment: "false"
   - name: KEYWORD_FOR
@@ -483,8 +510,6 @@ tokens:
     comment: "if"
   - name: KEYWORD_IF_MODIFIER
     comment: "if in the modifier form"
-  - name: KEYWORD_IN
-    comment: "in"
   - name: KEYWORD_MODULE
     comment: "module"
   - name: KEYWORD_NEXT
@@ -497,8 +522,6 @@ tokens:
     comment: "or"
   - name: KEYWORD_REDO
     comment: "redo"
-  - name: KEYWORD_RESCUE
-    comment: "rescue"
   - name: KEYWORD_RESCUE_MODIFIER
     comment: "rescue in the modifier form"
   - name: KEYWORD_RETRY
@@ -509,8 +532,6 @@ tokens:
     comment: "self"
   - name: KEYWORD_SUPER
     comment: "super"
-  - name: KEYWORD_THEN
-    comment: "then"
   - name: KEYWORD_TRUE
     comment: "true"
   - name: KEYWORD_UNDEF
@@ -523,8 +544,6 @@ tokens:
     comment: "until"
   - name: KEYWORD_UNTIL_MODIFIER
     comment: "until in the modifier form"
-  - name: KEYWORD_WHEN
-    comment: "when"
   - name: KEYWORD_WHILE
     comment: "while"
   - name: KEYWORD_WHILE_MODIFIER
@@ -561,16 +580,12 @@ tokens:
     comment: "-="
   - name: MINUS_GREATER
     comment: "->"
-  - name: NEWLINE
-    comment: "a newline character outside of other tokens"
   - name: NUMBERED_REFERENCE
     comment: "a numbered reference to a capture group in the previous regular expression match"
   - name: PARENTHESIS_LEFT
     comment: "("
   - name: PARENTHESIS_LEFT_PARENTHESES
     comment: "( for a parentheses node"
-  - name: PARENTHESIS_RIGHT
-    comment: ")"
   - name: PERCENT
     comment: "%"
   - name: PERCENT_EQUAL
@@ -585,8 +600,6 @@ tokens:
     comment: "%I"
   - name: PERCENT_UPPER_W
     comment: "%W"
-  - name: PIPE
-    comment: "|"
   - name: PIPE_EQUAL
     comment: "|="
   - name: PIPE_PIPE
@@ -603,8 +616,6 @@ tokens:
     comment: "the beginning of a regular expression"
   - name: REGEXP_END
     comment: "the end of a regular expression"
-  - name: SEMICOLON
-    comment: ";"
   - name: SLASH
     comment: "/"
   - name: SLASH_EQUAL
@@ -803,8 +814,6 @@ nodes:
           - GlobalVariableReadNode
           - BackReferenceReadNode
           - NumberedReferenceReadNode
-          - on error: SymbolNode # alias $a b
-          - on error: MissingNode # alias $a 42
         comment: |
           Represents the old name of the global variable that can be used before aliasing.
 
@@ -813,7 +822,7 @@ nodes:
       - name: keyword_loc
         type: location
         comment: |
-          The location of the `alias` keyword.
+          The Location of the `alias` keyword.
 
               alias $foo $bar
               ^^^^^
@@ -845,8 +854,6 @@ nodes:
         kind:
           - SymbolNode
           - InterpolatedSymbolNode
-          - on error: GlobalVariableReadNode # alias a $b
-          - on error: MissingNode # alias a 42
         comment: |
           Represents the old name of the method that will be aliased.
 
@@ -861,7 +868,7 @@ nodes:
       - name: keyword_loc
         type: location
         comment: |
-          Represents the location of the `alias` keyword.
+          Represents the Location of the `alias` keyword.
 
               alias foo bar
               ^^^^^
@@ -891,7 +898,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          Represents the alternation operator location.
+          Represents the alternation operator Location.
 
               foo => bar | baz
                          ^
@@ -927,7 +934,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `and` keyword or the `&&` operator.
+          The Location of the `and` keyword or the `&&` operator.
 
               left and right
                    ^^^
@@ -962,7 +969,7 @@ nodes:
       - name: opening_loc
         type: location?
         comment: |
-          Represents the optional source location for the opening token.
+          Represents the optional source Location for the opening token.
 
               [1,2,3]                 # "["
               %w[foo bar baz]         # "%w["
@@ -971,7 +978,7 @@ nodes:
       - name: closing_loc
         type: location?
         comment: |
-          Represents the optional source location for the closing token.
+          Represents the optional source Location for the closing token.
 
               [1,2,3]                 # "]"
               %w[foo bar baz]         # "]"
@@ -987,8 +994,19 @@ nodes:
       - name: constant
         type: node?
         kind:
-          - ConstantReadNode
           - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the Array
+
+              foo in Bar[]
+                     ^^^
+
+              foo in Bar[1, 2, 3]
+                     ^^^
+
+              foo in Bar::Baz[1, 2, 3]
+                     ^^^^^^^^
       - name: requireds
         type: node[]
         kind: pattern expression
@@ -999,7 +1017,9 @@ nodes:
                       ^  ^
       - name: rest
         type: node?
-        kind: pattern expression
+        kind:
+          - ImplicitRestNode
+          - SplatNode
         comment: |
           Represents the rest element of the array pattern.
 
@@ -1016,14 +1036,14 @@ nodes:
       - name: opening_loc
         type: location?
         comment: |
-          Represents the opening location of the array pattern.
+          Represents the opening Location of the array pattern.
 
               foo in [1, 2]
                      ^
       - name: closing_loc
         type: location?
         comment: |
-          Represents the closing location of the array pattern.
+          Represents the closing Location of the array pattern.
 
               foo in [1, 2]
                           ^
@@ -1031,19 +1051,19 @@ nodes:
       Represents an array pattern in pattern matching.
 
           foo in 1, 2
-          ^^^^^^^^^^^
+                 ^^^^
 
           foo in [1, 2]
-          ^^^^^^^^^^^^^
+                 ^^^^^^
 
           foo in *bar
-          ^^^^^^^^^^^
+                 ^^^^
 
           foo in Bar[]
-          ^^^^^^^^^^^^
+                 ^^^^^
 
           foo in Bar[1, 2, 3]
-          ^^^^^^^^^^^^^^^^^^^
+                 ^^^^^^^^^^^^
   - name: AssocNode
     fields:
       - name: key
@@ -1074,7 +1094,7 @@ nodes:
       - name: operator_loc
         type: location?
         comment: |
-          The location of the `=>` operator, if present.
+          The Location of the `=>` operator, if present.
 
               { foo => bar }
                     ^^
@@ -1096,7 +1116,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `**` operator.
+          The Location of the `**` operator.
 
               { **x }
                 ^^
@@ -1125,7 +1145,7 @@ nodes:
       - name: begin_keyword_loc
         type: location?
         comment: |
-          Represents the location of the `begin` keyword.
+          Represents the Location of the `begin` keyword.
 
               begin x end
               ^^^^^
@@ -1152,7 +1172,7 @@ nodes:
           Represents the else clause within the begin block.
 
               begin x; rescue y; else z; end
-                                 ^^^^^^
+                                 ^^^^^^^^^^^
       - name: ensure_clause
         type: node?
         kind: EnsureNode
@@ -1164,7 +1184,7 @@ nodes:
       - name: end_keyword_loc
         type: location?
         comment: |
-          Represents the location of the `end` keyword.
+          Represents the Location of the `end` keyword.
 
               begin x end
                       ^^^
@@ -1185,11 +1205,11 @@ nodes:
           The expression that is being passed as a block argument. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
 
               foo(&args)
-                  ^^^^^
+                   ^^^^
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the `&` operator.
+          Represents the Location of the `&` operator.
 
               foo(&args)
                   ^
@@ -1197,7 +1217,7 @@ nodes:
       Represents a block argument using `&`.
 
           bar(&args)
-          ^^^^^^^^^^
+              ^^^^^
   - name: BlockLocalVariableNode
     flags: ParameterFlags
     fields:
@@ -1250,17 +1270,17 @@ nodes:
       - name: opening_loc
         type: location
         comment: |
-          Represents the location of the opening `|`.
+          Represents the Location of the opening `{` or `do`.
 
               [1, 2, 3].each { |i| puts x }
-                               ^
+                             ^
       - name: closing_loc
         type: location
         comment: |
-          Represents the location of the closing `|`.
+          Represents the Location of the closing `}` or `end`.
 
               [1, 2, 3].each { |i| puts x }
-                                 ^
+                                          ^
     comment: |
       Represents a block of ruby code.
 
@@ -1280,14 +1300,14 @@ nodes:
       - name: name_loc
         type: location?
         comment: |
-          Represents the location of the block parameter name.
+          Represents the Location of the block parameter name.
 
               def a(&b)
                      ^
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the `&` operator.
+          Represents the Location of the `&` operator.
 
               def a(&b)
                     ^
@@ -1327,7 +1347,7 @@ nodes:
       - name: opening_loc
         type: location?
         comment: |
-          Represents the opening location of the block parameters.
+          Represents the opening Location of the block parameters.
 
               -> (a, b = 1; local) { }
                  ^
@@ -1338,7 +1358,7 @@ nodes:
       - name: closing_loc
         type: location?
         comment: |
-          Represents the closing location of the block parameters.
+          Represents the closing Location of the block parameters.
 
               -> (a, b = 1; local) { }
                                  ^
@@ -1368,7 +1388,7 @@ nodes:
       - name: keyword_loc
         type: location
         comment: |
-          The location of the `break` keyword.
+          The Location of the `break` keyword.
 
               break foo
               ^^^^^
@@ -1391,14 +1411,14 @@ nodes:
       - name: call_operator_loc
         type: location?
         comment: |
-          Represents the location of the call operator.
+          Represents the Location of the call operator.
 
               foo.bar &&= value
                  ^
       - name: message_loc
         type: location?
         comment: |
-          Represents the location of the message.
+          Represents the Location of the message.
 
               foo.bar &&= value
                   ^^^
@@ -1419,7 +1439,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the operator.
+          Represents the Location of the operator.
 
               foo.bar &&= value
                       ^^^
@@ -1456,7 +1476,7 @@ nodes:
       - name: call_operator_loc
         type: location?
         comment: |
-          Represents the location of the call operator.
+          Represents the Location of the call operator.
 
               foo.bar
                  ^
@@ -1473,14 +1493,15 @@ nodes:
       - name: message_loc
         type: location?
         comment: |
-          Represents the location of the message.
+          Represents the Location of the message.
 
               foo.bar
                   ^^^
       - name: opening_loc
         type: location?
         comment: |
-          Represents the location of the left parenthesis.
+          Represents the Location of the left parenthesis.
+
               foo(bar)
                  ^
       - name: arguments
@@ -1494,10 +1515,20 @@ nodes:
       - name: closing_loc
         type: location?
         comment: |
-          Represents the location of the right parenthesis.
+          Represents the Location of the right parenthesis.
 
               foo(bar)
                      ^
+      - name: equal_loc
+        type: location?
+        comment: |
+          Represents the Location of the equal sign, in the case that this is an attribute write.
+
+              foo.bar = value
+                      ^
+
+              foo[bar] = value
+                       ^
       - name: block
         type: node?
         kind:
@@ -1542,14 +1573,14 @@ nodes:
       - name: call_operator_loc
         type: location?
         comment: |
-          Represents the location of the call operator.
+          Represents the Location of the call operator.
 
               foo.bar += value
                  ^
       - name: message_loc
         type: location?
         comment: |
-          Represents the location of the message.
+          Represents the Location of the message.
 
               foo.bar += value
                   ^^^
@@ -1577,7 +1608,7 @@ nodes:
       - name: binary_operator_loc
         type: location
         comment: |
-          Represents the location of the binary operator.
+          Represents the Location of the binary operator.
 
               foo.bar += value
                       ^^
@@ -1608,14 +1639,14 @@ nodes:
       - name: call_operator_loc
         type: location?
         comment: |
-          Represents the location of the call operator.
+          Represents the Location of the call operator.
 
               foo.bar ||= value
                  ^
       - name: message_loc
         type: location?
         comment: |
-          Represents the location of the message.
+          Represents the Location of the message.
 
               foo.bar ||= value
                   ^^^
@@ -1636,7 +1667,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the operator.
+          Represents the Location of the operator.
 
               foo.bar ||= value
                       ^^^
@@ -1667,7 +1698,7 @@ nodes:
       - name: call_operator_loc
         type: location
         comment: |
-          Represents the location of the call operator.
+          Represents the Location of the call operator.
 
               foo.bar = 1
                  ^
@@ -1681,7 +1712,7 @@ nodes:
       - name: message_loc
         type: location
         comment: |
-          Represents the location of the message.
+          Represents the Location of the message.
 
               foo.bar = 1
                   ^^^
@@ -1719,7 +1750,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the `=>` operator.
+          Represents the Location of the `=>` operator.
 
               foo => bar
                   ^^
@@ -1727,7 +1758,7 @@ nodes:
       Represents assigning to a local variable in pattern matching.
 
           foo => [bar => baz]
-                 ^^^^^^^^^^^^
+                  ^^^^^^^^^^
   - name: CaseMatchNode
     fields:
       - name: predicate
@@ -1737,7 +1768,7 @@ nodes:
           Represents the predicate of the case match. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
 
               case true; in false; end
-              ^^^^
+                   ^^^^
       - name: conditions
         type: node[]
         kind: InNode
@@ -1753,18 +1784,18 @@ nodes:
           Represents the else clause of the case match.
 
               case true; in false; else; end
-                                   ^^^^
+                                   ^^^^^^^^^
       - name: case_keyword_loc
         type: location
         comment: |
-          Represents the location of the `case` keyword.
+          Represents the Location of the `case` keyword.
 
               case true; in false; end
               ^^^^
       - name: end_keyword_loc
         type: location
         comment: |
-          Represents the location of the `end` keyword.
+          Represents the Location of the `end` keyword.
 
               case true; in false; end
                                    ^^^
@@ -1784,7 +1815,7 @@ nodes:
           Represents the predicate of the case statement. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
 
               case true; when false; end
-              ^^^^
+                   ^^^^
       - name: conditions
         type: node[]
         kind: WhenNode
@@ -1800,18 +1831,18 @@ nodes:
           Represents the else clause of the case statement.
 
               case true; when false; else; end
-                                     ^^^^
+                                     ^^^^^^^^^
       - name: case_keyword_loc
         type: location
         comment: |
-          Represents the location of the `case` keyword.
+          Represents the Location of the `case` keyword.
 
               case true; when false; end
               ^^^^
       - name: end_keyword_loc
         type: location
         comment: |
-          Represents the location of the `end` keyword.
+          Represents the Location of the `end` keyword.
 
               case true; when false; end
                                      ^^^
@@ -1828,26 +1859,54 @@ nodes:
         type: constant[]
       - name: class_keyword_loc
         type: location
+        comment: |
+          Represents the Location of the `class` keyword.
+
+              class Foo end
+              ^^^^^
       - name: constant_path
         type: node
         kind:
           - ConstantReadNode
           - ConstantPathNode
-          - on error: CallNode # class 0.X end
       - name: inheritance_operator_loc
         type: location?
+        comment: |
+          Represents the Location of the `<` operator.
+
+              class Foo < Bar
+                        ^
       - name: superclass
         type: node?
         kind: non-void expression
+        comment: |
+          Represents the superclass of the class.
+
+              class Foo < Bar
+                          ^^^
       - name: body
         type: node?
         kind:
           - StatementsNode
           - BeginNode
+        comment: |
+          Represents the body of the class.
+
+              class Foo; bar; end
+                         ^^^
       - name: end_keyword_loc
         type: location
+        comment: |
+          Represents the Location of the `end` keyword.
+
+              class Foo end
+                        ^^^
       - name: name
         type: constant
+        comment: |
+          The name of the class.
+
+              class Foo end # name `:Foo`
     comment: |
       Represents a class declaration involving the `class` keyword.
 
@@ -1865,14 +1924,14 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          Represents the location of the variable name.
+          Represents the Location of the variable name.
 
               @@target &&= value
               ^^^^^^^^
       - name: operator_loc
         type: location
         comment: |
-          Represents the location of the `&&=` operator.
+          Represents the Location of the `&&=` operator.
 
               @@target &&= value
                        ^^^
@@ -1960,7 +2019,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the variable name.
+          The Location of the variable name.
 
               @@foo = :bar
               ^^^^^
@@ -1978,7 +2037,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               @@foo = :bar
                     ^
@@ -2074,7 +2133,7 @@ nodes:
       - name: delimiter_loc
         type: location
         comment: |
-          The location of the `::` delimiter.
+          The Location of the `::` delimiter.
 
               ::Foo
               ^^
@@ -2084,7 +2143,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the name of the constant.
+          The Location of the name of the constant.
 
               ::Foo
                 ^^^
@@ -2160,7 +2219,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               ::ABC = 123
                     ^
@@ -2220,7 +2279,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the constant name.
+          The Location of the constant name.
 
               FOO = 1
               ^^^
@@ -2238,7 +2297,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               FOO = :bar
                   ^
@@ -2363,6 +2422,15 @@ nodes:
           ^^^^^^
             bar
           end
+  - name: ErrorRecoveryNode
+    fields:
+      - name: unexpected
+        type: node?
+        kind: Node
+        comment: |
+          The unexpected node that was found in the tree, if there was one.
+    comment: |
+      Represents a node that is either missing or unexpected and results in a syntax error.
   - name: FalseNode
     comment: |
       Represents the use of the literal `false` keyword.
@@ -2374,23 +2442,66 @@ nodes:
       - name: constant
         type: node?
         kind:
-          - ConstantReadNode
           - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the pattern
+
+              foo in Foo(*bar, baz, *qux)
+                     ^^^
       - name: left
         type: node
         kind: SplatNode
+        comment: |
+          Represents the first wildcard node in the pattern.
+
+              foo in *bar, baz, *qux
+                     ^^^^
+
+              foo in Foo(*bar, baz, *qux)
+                         ^^^^
       - name: requireds
         type: node[]
         kind: pattern expression
+        comment: |
+          Represents the nodes in between the wildcards.
+
+              foo in *bar, baz, *qux
+                           ^^^
+
+              foo in Foo(*bar, baz, 1, *qux)
+                               ^^^^^^
       - name: right
         type: node
-        kind:
-          - SplatNode
-          - on error: MissingNode
+        kind: SplatNode
+        comment: |
+          Represents the second wildcard node in the pattern.
+
+              foo in *bar, baz, *qux
+                                ^^^^
+
+              foo in Foo(*bar, baz, *qux)
+                                    ^^^^
       - name: opening_loc
         type: location?
+        comment: |
+          The Location of the opening brace.
+
+              foo in [*bar, baz, *qux]
+                     ^
+
+              foo in Foo(*bar, baz, *qux)
+                        ^
       - name: closing_loc
         type: location?
+        comment: |
+          The Location of the closing brace.
+
+              foo in [*bar, baz, *qux]
+                                     ^
+
+              foo in Foo(*bar, baz, *qux)
+                                        ^
     comment: |
       Represents a find pattern in pattern matching.
 
@@ -2402,6 +2513,9 @@ nodes:
 
           foo in Foo(*bar, baz, *qux)
                  ^^^^^^^^^^^^^^^^^^^^
+
+          foo => *bar, baz, *qux
+                 ^^^^^^^^^^^^^^^
   - name: FlipFlopNode
     flags: RangeFlags
     fields:
@@ -2442,9 +2556,6 @@ nodes:
           - CallTargetNode
           - IndexTargetNode
           - MultiTargetNode
-          - on error: BackReferenceReadNode # for $& in a end
-          - on error: NumberedReferenceReadNode # for $1 in a end
-          - on error: MissingNode # for in 1..10; end
         comment: |
           The index expression for `for` loops.
 
@@ -2471,28 +2582,28 @@ nodes:
       - name: for_keyword_loc
         type: location
         comment: |
-          The location of the `for` keyword.
+          The Location of the `for` keyword.
 
               for i in a end
               ^^^
       - name: in_keyword_loc
         type: location
         comment: |
-          The location of the `in` keyword.
+          The Location of the `in` keyword.
 
               for i in a end
                     ^^
       - name: do_keyword_loc
         type: location?
         comment: |
-          The location of the `do` keyword, if present.
+          The Location of the `do` keyword, if present.
 
               for i in a do end
                          ^^
       - name: end_keyword_loc
         type: location
         comment: |
-          The location of the `end` keyword.
+          The Location of the `end` keyword.
 
               for i in a end
                          ^^^
@@ -2518,14 +2629,29 @@ nodes:
           end
   - name: ForwardingSuperNode
     fields:
+      - name: keyword_loc
+        type: location
+        comment: |
+          super
+          ^^^^^
+
+          super { 123 }
+          ^^^^^
       - name: block
         type: node?
         kind: BlockNode
+        comment: |
+          All other arguments are forwarded as normal, except the original block is replaced with the new block.
     comment: |
-      Represents the use of the `super` keyword without parentheses or arguments.
+      Represents the use of the `super` keyword without parentheses or arguments, but which might have a block.
 
           super
           ^^^^^
+
+          super { 123 }
+          ^^^^^^^^^^^^^
+
+      If it has any other arguments, it would be a `SuperNode` instead.
   - name: GlobalVariableAndWriteNode
     fields:
       - name: name
@@ -2613,7 +2739,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the global variable's name.
+          The Location of the global variable's name.
 
               $foo = :bar
               ^^^^
@@ -2631,7 +2757,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               $foo = :bar
                    ^
@@ -2645,7 +2771,7 @@ nodes:
       - name: opening_loc
         type: location
         comment: |
-          The location of the opening brace.
+          The Location of the opening brace.
 
               { a => b }
               ^
@@ -2665,7 +2791,7 @@ nodes:
       - name: closing_loc
         type: location
         comment: |
-          The location of the closing brace.
+          The Location of the closing brace.
 
               { a => b }
                        ^
@@ -2679,20 +2805,60 @@ nodes:
       - name: constant
         type: node?
         kind:
-          - ConstantReadNode
           - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the Hash.
+
+              foo => Bar[a: 1, b: 2]
+                   ^^^
+
+              foo => Bar::Baz[a: 1, b: 2]
+                   ^^^^^^^^
       - name: elements
         type: node[]
         kind: AssocNode
+        comment: |
+          Represents the explicit named hash keys and values.
+
+              foo => { a: 1, b:, ** }
+                       ^^^^^^^^
       - name: rest
         type: node?
         kind:
           - AssocSplatNode
           - NoKeywordsParameterNode
+        comment: |
+          Represents the rest of the Hash keys and values. This can be named, unnamed, or explicitly forbidden via `**nil`, this last one results in a `NoKeywordsParameterNode`.
+
+              foo => { a: 1, b:, **c }
+                                 ^^^
+
+              foo => { a: 1, b:, ** }
+                                 ^^
+
+              foo => { a: 1, b:, **nil }
+                                 ^^^^^
       - name: opening_loc
         type: location?
+        comment: |
+          The Location of the opening brace.
+
+              foo => { a: 1 }
+                     ^
+
+              foo => Bar[a: 1]
+                        ^
       - name: closing_loc
         type: location?
+        comment: |
+          The Location of the closing brace.
+
+              foo => { a: 1 }
+                            ^
+
+              foo => Bar[a: 1]
+                             ^
     comment: |
       Represents a hash pattern in pattern matching.
 
@@ -2701,12 +2867,18 @@ nodes:
 
           foo => { a: 1, b: 2, **c }
                  ^^^^^^^^^^^^^^^^^^^
+
+          foo => Bar[a: 1, b: 2]
+                 ^^^^^^^^^^^^^^^
+
+          foo in { a: 1, b: 2 }
+                 ^^^^^^^^^^^^^^
   - name: IfNode
     fields:
       - name: if_keyword_loc
         type: location?
         comment: |
-          The location of the `if` keyword if present.
+          The Location of the `if` keyword if present.
 
               bar if foo
                   ^^
@@ -2731,7 +2903,7 @@ nodes:
       - name: then_keyword_loc
         type: location?
         comment: |
-          The location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
+          The Location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
 
               if foo then bar end
                      ^^^^
@@ -2772,7 +2944,7 @@ nodes:
       - name: end_keyword_loc
         type: location?
         comment: |
-          The location of the `end` keyword if present, `nil` otherwise.
+          The Location of the `end` keyword if present, `nil` otherwise.
 
               if foo
                 bar
@@ -3057,7 +3229,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the variable name.
+          The Location of the variable name.
 
               @_x = 1
               ^^^
@@ -3075,7 +3247,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               @x = y
                  ^
@@ -3145,7 +3317,6 @@ nodes:
           - EmbeddedStatementsNode
           - EmbeddedVariableNode
           - InterpolatedStringNode # `"a" "#{b}"`
-          - on error: XStringNode # `<<`FOO` "bar"
       - name: closing_loc
         type: location?
     newline: parts
@@ -3353,6 +3524,9 @@ nodes:
 
           foo, bar = baz
           ^^^  ^^^
+
+          foo => baz
+                 ^^^
   - name: LocalVariableWriteNode
     fields:
       - name: name
@@ -3376,7 +3550,7 @@ nodes:
       - name: name_loc
         type: location
         comment: |
-          The location of the variable name.
+          The Location of the variable name.
 
               foo = :bar
               ^^^
@@ -3398,7 +3572,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `=` operator.
+          The Location of the `=` operator.
 
               x = :y
                 ^
@@ -3443,11 +3617,65 @@ nodes:
       - name: value
         type: node
         kind: non-void expression
+        comment: |
+          Represents the left-hand side of the operator.
+
+              foo => bar
+              ^^^
       - name: pattern
         type: node
         kind: pattern expression
+        comment: |
+          Represents the right-hand side of the operator. The type of the node depends on the expression.
+
+          Anything that looks like a local variable name (including `_`) will result in a `LocalVariableTargetNode`.
+
+              foo => a # This is equivalent to writing `a = foo`
+                     ^
+
+          Using an explicit `Array` or combining expressions with `,` will result in a `ArrayPatternNode`. This can be preceded by a constant.
+
+              foo => [a]
+                     ^^^
+
+              foo => a, b
+                     ^^^^
+
+              foo => Bar[a, b]
+                     ^^^^^^^^^
+
+          If the array pattern contains at least two wildcard matches, a `FindPatternNode` is created instead.
+
+              foo => *, 1, *a
+                     ^^^^^
+
+          Using an explicit `Hash` or a constant with square brackets and hash keys in the square brackets will result in a `HashPatternNode`.
+
+              foo => { a: 1, b: }
+
+              foo => Bar[a: 1, b:]
+
+              foo => Bar[**]
+
+          To use any variable that needs run time evaluation, pinning is required. This results in a `PinnedVariableNode`
+
+              foo => ^a
+                     ^^
+
+          Similar, any expression can be used with pinning. This results in a `PinnedExpressionNode`.
+
+              foo => ^(a + 1)
+
+          Anything else will result in the regular node for that expression, for example a `ConstantReadNode`.
+
+              foo => CONST
       - name: operator_loc
         type: location
+        comment: |
+          The Location of the operator.
+
+              foo => bar
+                  ^^
     comment: |
       Represents the use of the `=>` operator.
 
@@ -3466,9 +3694,6 @@ nodes:
 
           /(?<foo>bar)/ =~ baz
           ^^^^^^^^^^^^^^^^^^^^
-  - name: MissingNode
-    comment: |
-      Represents a node that is missing from the source and results in a syntax error.
   - name: ModuleNode
     fields:
       - name: locals
@@ -3480,7 +3705,6 @@ nodes:
         kind:
           - ConstantReadNode
           - ConstantPathNode
-          - on error: MissingNode # module Parent module end
       - name: body
         type: node?
         kind:
@@ -3510,8 +3734,6 @@ nodes:
           - IndexTargetNode
           - MultiTargetNode
           - RequiredParameterNode # def m((a,b)); end
-          - on error: BackReferenceReadNode # a, (b, $&) = z
-          - on error: NumberedReferenceReadNode # a, (b, $1) = z
         comment: |
           Represents the targets expressions before a splat node.
 
@@ -3555,8 +3777,6 @@ nodes:
           - IndexTargetNode
           - MultiTargetNode
           - RequiredParameterNode # def m((*,b)); end
-          - on error: BackReferenceReadNode # a, (*, $&) = z
-          - on error: NumberedReferenceReadNode # a, (*, $1) = z
         comment: |
           Represents the targets expressions after a splat node.
 
@@ -3565,14 +3785,14 @@ nodes:
       - name: lparen_loc
         type: location?
         comment: |
-          The location of the opening parenthesis.
+          The Location of the opening parenthesis.
 
               a, (b, c) = 1, 2, 3
                  ^
       - name: rparen_loc
         type: location?
         comment: |
-          The location of the closing parenthesis.
+          The Location of the closing parenthesis.
 
               a, (b, c) = 1, 2, 3
                       ^
@@ -3600,8 +3820,6 @@ nodes:
           - CallTargetNode
           - IndexTargetNode
           - MultiTargetNode
-          - on error: BackReferenceReadNode # $&, = z
-          - on error: NumberedReferenceReadNode # $1, = z
         comment: |
           Represents the targets expressions before a splat node.
 
@@ -3644,8 +3862,6 @@ nodes:
           - CallTargetNode
           - IndexTargetNode
           - MultiTargetNode
-          - on error: BackReferenceReadNode # *, $& = z
-          - on error: NumberedReferenceReadNode # *, $1 = z
         comment: |
           Represents the targets expressions after a splat node.
 
@@ -3654,21 +3870,21 @@ nodes:
       - name: lparen_loc
         type: location?
         comment: |
-          The location of the opening parenthesis.
+          The Location of the opening parenthesis.
 
               (a, b, c) = 1, 2, 3
               ^
       - name: rparen_loc
         type: location?
         comment: |
-          The location of the closing parenthesis.
+          The Location of the closing parenthesis.
 
               (a, b, c) = 1, 2, 3
                       ^
       - name: operator_loc
         type: location
         comment: |
-          The location of the operator.
+          The Location of the operator.
 
               a, b, c = 1, 2, 3
                       ^
@@ -3703,6 +3919,18 @@ nodes:
 
           nil
           ^^^
+  - name: NoBlockParameterNode
+    fields:
+      - name: operator_loc
+        type: location
+      - name: keyword_loc
+        type: location
+    comment: |
+      Represents the use of `&nil` inside method arguments.
+
+          def a(&nil)
+                ^^^^
+          end
   - name: NoKeywordsParameterNode
     fields:
       - name: operator_loc
@@ -3802,7 +4030,7 @@ nodes:
       - name: operator_loc
         type: location
         comment: |
-          The location of the `or` keyword or the `||` operator.
+          The Location of the `or` keyword or the `||` operator.
 
               left or right
                    ^^
@@ -3831,11 +4059,6 @@ nodes:
         kind:
           - RequiredParameterNode
           - MultiTargetNode
-          # On parsing error of `f(**kwargs, ...)` or `f(**nil, ...)`, the keyword_rest value is moved here:
-          - on error: KeywordRestParameterNode
-          - on error: NoKeywordsParameterNode
-          # On parsing error of `f(..., ...)`, the first forwarding parameter is moved here:
-          - on error: ForwardingParameterNode
       - name: keywords
         type: node[]
         kind:
@@ -3849,7 +4072,9 @@ nodes:
           - NoKeywordsParameterNode
       - name: block
         type: node?
-        kind: BlockParameterNode
+        kind:
+          - BlockParameterNode
+          - NoBlockParameterNode
     comment: |
       Represents the list of parameters on a method, block, or lambda definition.
 
@@ -3877,12 +4102,32 @@ nodes:
       - name: expression
         type: node
         kind: non-void expression
+        comment: |
+          The expression used in the pinned expression
+
+              foo in ^(bar)
+                       ^^^
       - name: operator_loc
         type: location
+        comment: |
+          The Location of the `^` operator
+
+              foo in ^(bar)
+                     ^
       - name: lparen_loc
         type: location
+        comment: |
+          The Location of the opening parenthesis.
+
+              foo in ^(bar)
+                      ^
       - name: rparen_loc
         type: location
+        comment: |
+          The Location of the closing parenthesis.
+
+              foo in ^(bar)
+                          ^
     comment: |
       Represents the use of the `^` operator for pinning an expression in a pattern matching expression.
 
@@ -3900,9 +4145,18 @@ nodes:
           - BackReferenceReadNode # foo in ^$&
           - NumberedReferenceReadNode # foo in ^$1
           - ItLocalVariableReadNode # proc { 1 in ^it }
-          - on error: MissingNode # foo in ^Bar
+        comment: |
+          The variable used in the pinned expression
+
+              foo in ^bar
+                      ^^^
       - name: operator_loc
         type: location
+        comment: |
+          The Location of the `^` operator
+
+              foo in ^bar
+                     ^
     comment: |
       Represents the use of the `^` operator for pinning a variable in a pattern matching expression.
 
@@ -3973,11 +4227,11 @@ nodes:
 
               1...foo
                   ^^^
-          If neither right-hand or left-hand side was included, this will be a MissingNode.
+          If neither right-hand or left-hand side was included, this will be an ErrorRecoveryNode.
       - name: operator_loc
         type: location
         comment: |
-          The location of the `..` or `...` operator.
+          The Location of the `..` or `...` operator.
     comment: |
       Represents the use of the `..` or `...` operators.
 
@@ -4088,9 +4342,6 @@ nodes:
           - ConstantPathTargetNode
           - CallTargetNode
           - IndexTargetNode
-          - on error: BackReferenceReadNode # => begin; rescue => $&; end
-          - on error: NumberedReferenceReadNode # => begin; rescue => $1; end
-          - on error: MissingNode # begin; rescue =>; end
       - name: then_keyword_loc
         type: location?
       - name: statements
@@ -4203,7 +4454,7 @@ nodes:
     fields:
       - name: filepath
         type: string
-        comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism::parse*` APIs.
+        comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism.parse*` APIs.
     comment: |
       Represents the use of the `__FILE__` keyword.
 
@@ -4268,6 +4519,7 @@ nodes:
       - name: arguments
         type: node?
         kind: ArgumentsNode
+        comment: "Can be only `nil` when there are empty parentheses, like `super()`."
       - name: rparen_loc
         type: location?
       - name: block
@@ -4283,6 +4535,8 @@ nodes:
 
           super foo, bar
           ^^^^^^^^^^^^^^
+
+      If no arguments are provided (except for a block), it would be a `ForwardingSuperNode` instead.
   - name: SymbolNode
     flags: SymbolFlags
     fields:
@@ -4327,7 +4581,7 @@ nodes:
       - name: keyword_loc
         type: location
         comment: |
-          The location of the `unless` keyword.
+          The Location of the `unless` keyword.
 
               unless cond then bar end
               ^^^^^^
@@ -4348,7 +4602,7 @@ nodes:
       - name: then_keyword_loc
         type: location?
         comment: |
-          The location of the `then` keyword, if present.
+          The Location of the `then` keyword, if present.
 
               unless cond then bar end
                           ^^^^
@@ -4368,11 +4622,11 @@ nodes:
           The else clause of the unless expression, if present.
 
               unless cond then bar else baz end
-                                   ^^^^^^^^
+                                   ^^^^^^^^^^^^
       - name: end_keyword_loc
         type: location?
         comment: |
-          The location of the `end` keyword, if present.
+          The Location of the `end` keyword, if present.
 
               unless cond then bar end
                                    ^^^
diff --git a/prism/util/pm_constant_pool.c b/prism/constant_pool.c
index 38ea01a228..90201ebb8e 100644
--- a/prism/util/pm_constant_pool.c
+++ b/prism/constant_pool.c
@@ -1,4 +1,11 @@
-#include "prism/util/pm_constant_pool.h"
+#include "prism/internal/constant_pool.h"
+
+#include "prism/compiler/align.h"
+#include "prism/compiler/inline.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
+#include <stdbool.h>
 
 /**
  * Initialize a list of constant ids.
@@ -14,10 +21,9 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
  * Initialize a list of constant ids with a given capacity.
  */
 void
-pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity) {
     if (capacity) {
-        list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
-        if (list->ids == NULL) abort();
+        list->ids = (pm_constant_id_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_id_t), PRISM_ALIGNOF(pm_constant_id_t));
     } else {
         list->ids = NULL;
     }
@@ -27,19 +33,23 @@ pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
 }
 
 /**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
+ * Append a constant id to a list of constant ids.
  */
-bool
-pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
+void
+pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) {
     if (list->size >= list->capacity) {
-        list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
-        list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
-        if (list->ids == NULL) return false;
+        size_t new_capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+        pm_constant_id_t *new_ids = (pm_constant_id_t *) pm_arena_alloc(arena, sizeof(pm_constant_id_t) * new_capacity, PRISM_ALIGNOF(pm_constant_id_t));
+
+        if (list->size > 0) {
+            memcpy(new_ids, list->ids, list->size * sizeof(pm_constant_id_t));
+        }
+
+        list->ids = new_ids;
+        list->capacity = new_capacity;
     }
 
     list->ids[list->size++] = id;
-    return true;
 }
 
 /**
@@ -66,29 +76,66 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
 }
 
 /**
- * Free the memory associated with a list of constant ids.
+ * A multiply-xorshift hash that processes input a word at a time. This is
+ * significantly faster than the byte-at-a-time djb2 hash for the short strings
+ * typical in Ruby source (~15 bytes average). Each word is mixed into the hash
+ * by XOR followed by multiplication by a large odd constant, which spreads
+ * entropy across all bits. A final xorshift fold produces the 32-bit result.
  */
-void
-pm_constant_id_list_free(pm_constant_id_list_t *list) {
-    if (list->ids != NULL) {
-        xfree(list->ids);
-    }
-}
-
-/**
- * A relatively simple hash function (djb2) that is used to hash strings. We are
- * optimizing here for simplicity and speed.
- */
-static inline uint32_t
+static PRISM_INLINE uint32_t
 pm_constant_pool_hash(const uint8_t *start, size_t length) {
-    // This is a prime number used as the initial value for the hash function.
-    uint32_t value = 5381;
+    // This constant is borrowed from wyhash. It is a 64-bit odd integer with
+    // roughly equal 0/1 bits, chosen for good avalanche behavior when used in
+    // multiply-xorshift sequences.
+    static const uint64_t secret = 0x517cc1b727220a95ULL;
+    uint64_t hash = (uint64_t) length;
+
+    if (length <= 8) {
+        // Short strings: read first and last 4 bytes (overlapping for len < 8).
+        // This covers the majority of Ruby identifiers with a single multiply.
+        if (length >= 4) {
+            uint32_t a, b;
+            memcpy(&a, start, 4);
+            memcpy(&b, start + length - 4, 4);
+            hash ^= (uint64_t) a | ((uint64_t) b << 32);
+        } else if (length > 0) {
+            hash ^= (uint64_t) start[0] | ((uint64_t) start[length >> 1] << 8) | ((uint64_t) start[length - 1] << 16);
+        }
+        hash *= secret;
+    } else if (length <= 16) {
+        // Medium strings: read first and last 8 bytes (overlapping).
+        // Two multiplies instead of the three the loop-based approach needs.
+        uint64_t word;
+        memcpy(&word, start, 8);
+        hash ^= word;
+        hash *= secret;
+        memcpy(&word, start + length - 8, 8);
+        hash ^= word;
+        hash *= secret;
+    } else {
+        const uint8_t *ptr = start;
+        size_t remaining = length;
+
+        while (remaining >= 8) {
+            uint64_t word;
+            memcpy(&word, ptr, 8);
+            hash ^= word;
+            hash *= secret;
+            ptr += 8;
+            remaining -= 8;
+        }
 
-    for (size_t index = 0; index < length; index++) {
-        value = ((value << 5) + value) + start[index];
+        if (remaining > 0) {
+            // Read the last 8 bytes (overlapping with already-processed data).
+            uint64_t word;
+            memcpy(&word, start + length - 8, 8);
+            hash ^= word;
+            hash *= secret;
+        }
     }
 
-    return value;
+    hash ^= hash >> 32;
+    return (uint32_t) hash;
 }
 
 /**
@@ -121,21 +168,15 @@ is_power_of_two(uint32_t size) {
 /**
  * Resize a constant pool to a given capacity.
  */
-static inline bool
-pm_constant_pool_resize(pm_constant_pool_t *pool) {
+static PRISM_INLINE void
+pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) {
     assert(is_power_of_two(pool->capacity));
 
     uint32_t next_capacity = pool->capacity * 2;
-    if (next_capacity < pool->capacity) return false;
-
     const uint32_t mask = next_capacity - 1;
-    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-
-    void *next = xcalloc(next_capacity, element_size);
-    if (next == NULL) return false;
 
-    pm_constant_pool_bucket_t *next_buckets = next;
-    pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
+    pm_constant_pool_bucket_t *next_buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, next_capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+    pm_constant_t *next_constants = (pm_constant_t *) pm_arena_alloc(arena, next_capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
 
     // For each bucket in the current constant pool, find the index in the
     // next constant pool, and insert it.
@@ -163,33 +204,22 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
     // The constants are stable with respect to hash table resizes.
     memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
 
-    // pool->constants and pool->buckets are allocated out of the same chunk
-    // of memory, with the buckets coming first.
-    xfree(pool->buckets);
     pool->constants = next_constants;
     pool->buckets = next_buckets;
     pool->capacity = next_capacity;
-    return true;
 }
 
 /**
  * Initialize a new constant pool with a given capacity.
  */
-bool
-pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
-    const uint32_t maximum = (~((uint32_t) 0));
-    if (capacity >= ((maximum / 2) + 1)) return false;
-
+void
+pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity) {
     capacity = next_power_of_two(capacity);
-    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-    void *memory = xcalloc(capacity, element_size);
-    if (memory == NULL) return false;
 
-    pool->buckets = memory;
-    pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
+    pool->buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+    pool->constants = (pm_constant_t *) pm_arena_alloc(arena, capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
     pool->size = 0;
     pool->capacity = capacity;
-    return true;
 }
 
 /**
@@ -215,8 +245,7 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
     pm_constant_pool_bucket_t *bucket;
 
     while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
-        pm_constant_t *constant = &pool->constants[bucket->id - 1];
-        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+        if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
             return bucket->id;
         }
 
@@ -229,10 +258,10 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
 /**
  * Insert a constant into a constant pool and return its index in the pool.
  */
-static inline pm_constant_id_t
-pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
+static PRISM_INLINE pm_constant_id_t
+pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
     if (pool->size >= (pool->capacity / 4 * 3)) {
-        if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
+        pm_constant_pool_resize(arena, pool);
     }
 
     assert(is_power_of_two(pool->capacity));
@@ -246,25 +275,17 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
         // If there is a collision, then we need to check if the content is the
         // same as the content we are trying to insert. If it is, then we can
         // return the id of the existing constant.
-        pm_constant_t *constant = &pool->constants[bucket->id - 1];
-
-        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+        if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
             // Since we have found a match, we need to check if this is
             // attempting to insert a shared or an owned constant. We want to
             // prefer shared constants since they don't require allocations.
-            if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
-                // If we're attempting to insert an owned constant and we have
-                // an existing constant, then either way we don't want the given
-                // memory. Either it's duplicated with the existing constant or
-                // it's not necessary because we have a shared version.
-                xfree((void *) start);
-            } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+            if (type != PM_CONSTANT_POOL_BUCKET_OWNED && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
                 // If we're attempting to insert a shared constant and the
-                // existing constant is owned, then we can free the owned
-                // constant and replace it with the shared constant.
-                xfree((void *) constant->start);
-                constant->start = start;
-                bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
+                // existing constant is owned, then we can replace it with the
+                // shared constant to prefer non-owned references.
+                bucket->start = start;
+                bucket->type = (unsigned int) (type & 0x3);
+                pool->constants[bucket->id - 1].start = start;
             }
 
             return bucket->id;
@@ -281,7 +302,9 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
     *bucket = (pm_constant_pool_bucket_t) {
         .id = (unsigned int) (id & 0x3fffffff),
         .type = (unsigned int) (type & 0x3),
-        .hash = hash
+        .hash = hash,
+        .start = start,
+        .length = length
     };
 
     pool->constants[id - 1] = (pm_constant_t) {
@@ -297,8 +320,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
  * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
+pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
 }
 
 /**
@@ -307,8 +330,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
  * potential calls to resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
+pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
 }
 
 /**
@@ -317,26 +340,21 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t l
  * resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
+pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
 }
 
 /**
- * Free the memory associated with a constant pool.
+ * Return a raw pointer to the start of a constant.
  */
-void
-pm_constant_pool_free(pm_constant_pool_t *pool) {
-    // For each constant in the current constant pool, free the contents if the
-    // contents are owned.
-    for (uint32_t index = 0; index < pool->capacity; index++) {
-        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
-
-        // If an id is set on this constant, then we know we have content here.
-        if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
-            pm_constant_t *constant = &pool->constants[bucket->id - 1];
-            xfree((void *) constant->start);
-        }
-    }
+const uint8_t *
+pm_constant_start(const pm_constant_t *constant) {
+    return constant->start;
+}
 
-    xfree(pool->buckets);
+/**
+ * Return the length of a constant.
+ */
+size_t pm_constant_length(const pm_constant_t *constant) {
+    return constant->length;
 }
diff --git a/prism/constant_pool.h b/prism/constant_pool.h
new file mode 100644
index 0000000000..dc03235c70
--- /dev/null
+++ b/prism/constant_pool.h
@@ -0,0 +1,81 @@
+/**
+ * @file constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ *
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
+#ifndef PRISM_CONSTANT_POOL_H
+#define PRISM_CONSTANT_POOL_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/arena.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
+typedef uint32_t pm_constant_id_t;
+
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
+typedef struct {
+    /** The number of constant ids in the list. */
+    size_t size;
+
+    /** The number of constant ids that have been allocated in the list. */
+    size_t capacity;
+
+    /** The constant ids in the list. */
+    pm_constant_id_t *ids;
+} pm_constant_id_list_t;
+
+/** A constant in the pool which effectively stores a string. */
+typedef struct pm_constant_t pm_constant_t;
+
+/**
+ * The overall constant pool, which stores constants found while parsing.
+ */
+typedef struct pm_constant_pool_t pm_constant_pool_t;
+
+/**
+ * Return a raw pointer to the start of a constant.
+ *
+ * @param constant The constant to get the start of.
+ * @returns A raw pointer to the start of the constant.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant) PRISM_NONNULL(1);
+
+/**
+ * Return the length of a constant.
+ *
+ * @param constant The constant to get the length of.
+ * @returns The length of the constant.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1);
+
+/**
+ * Initialize a list of constant ids.
+ *
+ * @param list The list to initialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_constant_id_list_init(pm_constant_id_list_t *list) PRISM_NONNULL(1);
+
+/**
+ * Append a constant id to a list of constant ids.
+ *
+ * @param arena The arena to use for allocations.
+ * @param list The list to append to.
+ * @param id The constant id to append.
+ */
+PRISM_EXPORTED_FUNCTION void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) PRISM_NONNULL(1, 2);
+
+#endif
diff --git a/prism/defines.h b/prism/defines.h
deleted file mode 100644
index e31429c789..0000000000
--- a/prism/defines.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * @file defines.h
- *
- * Macro definitions used throughout the prism library.
- *
- * This file should be included first by any *.h or *.c in prism for consistency
- * and to ensure that the macros are defined before they are used.
- */
-#ifndef PRISM_DEFINES_H
-#define PRISM_DEFINES_H
-
-#include <ctype.h>
-#include <limits.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-/**
- * We want to be able to use the PRI* macros for printing out integers, but on
- * some platforms they aren't included unless this is already defined.
- */
-#define __STDC_FORMAT_MACROS
-// Include sys/types.h before inttypes.h to work around issue with
-// certain versions of GCC and newlib which causes omission of PRIx64
-#include <sys/types.h>
-#include <inttypes.h>
-
-/**
- * When we are parsing using recursive descent, we want to protect against
- * malicious payloads that could attempt to crash our parser. We do this by
- * specifying a maximum depth to which we are allowed to recurse.
- */
-#ifndef PRISM_DEPTH_MAXIMUM
-    #define PRISM_DEPTH_MAXIMUM 10000
-#endif
-
-/**
- * By default, we compile with -fvisibility=hidden. When this is enabled, we
- * need to mark certain functions as being publically-visible. This macro does
- * that in a compiler-agnostic way.
- */
-#ifndef PRISM_EXPORTED_FUNCTION
-#   ifdef PRISM_EXPORT_SYMBOLS
-#       ifdef _WIN32
-#          define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
-#       else
-#          define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
-#       endif
-#   else
-#       define PRISM_EXPORTED_FUNCTION
-#   endif
-#endif
-
-/**
- * Certain compilers support specifying that a function accepts variadic
- * parameters that look like printf format strings to provide a better developer
- * experience when someone is using the function. This macro does that in a
- * compiler-agnostic way.
- */
-#if defined(__GNUC__)
-#   if defined(__MINGW_PRINTF_FORMAT)
-#       define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index)))
-#   else
-#       define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
-#   endif
-#elif defined(__clang__)
-#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
-#else
-#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
-#endif
-
-/**
- * GCC will warn if you specify a function or parameter that is unused at
- * runtime. This macro allows you to mark a function or parameter as unused in a
- * compiler-agnostic way.
- */
-#if defined(__GNUC__)
-#   define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-#   define PRISM_ATTRIBUTE_UNUSED
-#endif
-
-/**
- * Old Visual Studio versions do not support the inline keyword, so we need to
- * define it to be __inline.
- */
-#if defined(_MSC_VER) && !defined(inline)
-#   define inline __inline
-#endif
-
-/**
- * Old Visual Studio versions before 2015 do not implement sprintf, but instead
- * implement _snprintf. We standard that here.
- */
-#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
-#   define snprintf _snprintf
-#endif
-
-/**
- * A simple utility macro to concatenate two tokens together, necessary when one
- * of the tokens is itself a macro.
- */
-#define PM_CONCATENATE(left, right) left ## right
-
-/**
- * We want to be able to use static assertions, but they weren't standardized
- * until C11. As such, we polyfill it here by making a hacky typedef that will
- * fail to compile due to a negative array size if the condition is false.
- */
-#if defined(_Static_assert)
-#   define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
-#else
-#   define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
-#endif
-
-/**
- * In general, libc for embedded systems does not support memory-mapped files.
- * If the target platform is POSIX or Windows, we can map a file in memory and
- * read it in a more efficient manner.
- */
-#ifdef _WIN32
-#   define PRISM_HAS_MMAP
-#else
-#   include <unistd.h>
-#   ifdef _POSIX_MAPPED_FILES
-#       define PRISM_HAS_MMAP
-#   endif
-#endif
-
-/**
- * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
- * related code from the library. All filesystem related code should be guarded
- * by PRISM_HAS_FILESYSTEM.
- */
-#ifndef PRISM_HAS_NO_FILESYSTEM
-#   define PRISM_HAS_FILESYSTEM
-#endif
-
-/**
- * isinf on POSIX systems it accepts a float, a double, or a long double.
- * But mingw didn't provide an isinf macro, only an isinf function that only
- * accepts floats, so we need to use _finite instead.
- */
-#ifdef __MINGW64__
-    #include <float.h>
-    #define PRISM_ISINF(x) (!_finite(x))
-#else
-    #define PRISM_ISINF(x) isinf(x)
-#endif
-
-/**
- * If you build prism with a custom allocator, configure it with
- * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
- * xrealloc, xcalloc, and xfree.
- *
- * For example, your `prism_xallocator.h` file could look like this:
- *
- * ```
- * #ifndef PRISM_XALLOCATOR_H
- * #define PRISM_XALLOCATOR_H
- * #define xmalloc      my_malloc
- * #define xrealloc     my_realloc
- * #define xcalloc      my_calloc
- * #define xfree        my_free
- * #endif
- * ```
- */
-#ifdef PRISM_XALLOCATOR
-    #include "prism_xallocator.h"
-#else
-    #ifndef xmalloc
-        /**
-         * The malloc function that should be used. This can be overridden with
-         * the PRISM_XALLOCATOR define.
-         */
-        #define xmalloc malloc
-    #endif
-
-    #ifndef xrealloc
-        /**
-         * The realloc function that should be used. This can be overridden with
-         * the PRISM_XALLOCATOR define.
-         */
-        #define xrealloc realloc
-    #endif
-
-    #ifndef xcalloc
-        /**
-         * The calloc function that should be used. This can be overridden with
-         * the PRISM_XALLOCATOR define.
-         */
-        #define xcalloc calloc
-    #endif
-
-    #ifndef xfree
-        /**
-         * The free function that should be used. This can be overridden with the
-         * PRISM_XALLOCATOR define.
-         */
-        #define xfree free
-    #endif
-#endif
-
-/**
- * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
- * switch that will turn off certain features of prism.
- */
-#ifdef PRISM_BUILD_MINIMAL
-    /** Exclude the serialization API. */
-    #define PRISM_EXCLUDE_SERIALIZATION
-
-    /** Exclude the JSON serialization API. */
-    #define PRISM_EXCLUDE_JSON
-
-    /** Exclude the Array#pack parser API. */
-    #define PRISM_EXCLUDE_PACK
-
-    /** Exclude the prettyprint API. */
-    #define PRISM_EXCLUDE_PRETTYPRINT
-
-    /** Exclude the full set of encodings, using the minimal only. */
-    #define PRISM_ENCODING_EXCLUDE_FULL
-#endif
-
-/**
- * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
- * branch predication.
- */
-#if defined(__GNUC__) || defined(__clang__)
-    /** The compiler should predicate that this branch will be taken. */
-    #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
-
-    /** The compiler should predicate that this branch will not be taken. */
-    #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-    /** Void because this platform does not support branch prediction hints. */
-    #define PRISM_LIKELY(x)   (x)
-
-    /** Void because this platform does not support branch prediction hints. */
-    #define PRISM_UNLIKELY(x) (x)
-#endif
-
-/**
- * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
- * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
- */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
-    #define PRISM_FALLTHROUGH [[fallthrough]];
-#elif defined(__GNUC__) || defined(__clang__)
-    #define PRISM_FALLTHROUGH __attribute__((fallthrough));
-#elif defined(_MSC_VER)
-    #define PRISM_FALLTHROUGH __fallthrough;
-#else
-    #define PRISM_FALLTHROUGH
-#endif
-
-#endif
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
new file mode 100644
index 0000000000..370061ec56
--- /dev/null
+++ b/prism/diagnostic.h
@@ -0,0 +1,93 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
+#ifndef PRISM_DIAGNOSTIC_H
+#define PRISM_DIAGNOSTIC_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+/**
+ * An opaque pointer to a diagnostic generated during parsing.
+ */
+typedef struct pm_diagnostic_t pm_diagnostic_t;
+
+/**
+ * The levels of errors generated during parsing.
+ */
+typedef enum {
+    /** For errors that should raise a syntax error. */
+    PM_ERROR_LEVEL_SYNTAX = 0,
+
+    /** For errors that should raise an argument error. */
+    PM_ERROR_LEVEL_ARGUMENT = 1,
+
+    /** For errors that should raise a load error. */
+    PM_ERROR_LEVEL_LOAD = 2
+} pm_error_level_t;
+
+/**
+ * The levels of warnings generated during parsing.
+ */
+typedef enum {
+    /** For warnings which should be emitted if $VERBOSE != nil. */
+    PM_WARNING_LEVEL_DEFAULT = 0,
+
+    /** For warnings which should be emitted if $VERBOSE == true. */
+    PM_WARNING_LEVEL_VERBOSE = 1
+} pm_warning_level_t;
+
+/**
+ * Get the type of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the type of.
+ * @returns The type of the given diagnostic. Note that this is a string
+ *     representation of an internal ID, and is not meant to be relied upon as a
+ *     stable identifier for the diagnostic. We do not guarantee that these will
+ *     not change in the future. This is meant to be used for debugging and
+ *     error reporting purposes, and not for programmatic checks.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the location of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the location of.
+ * @returns The location of the given diagnostic.
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the message of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the message of.
+ * @returns The message of the given diagnostic.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the error level associated with the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the error level of.
+ * @returns The error level of the given diagnostic. If the diagnostic was a
+ *     warning, or is in any way not an error, then the return value is
+ *     undefined and should not be relied upon.
+ */
+PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the warning level associated with the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the warning level of.
+ * @returns The warning level of the given diagnostic. If the diagnostic was an
+ *     error, or is in any way not a warning, then the return value is
+ *     undefined and should not be relied upon.
+ */
+PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/encoding.c b/prism/encoding.c
index a4aeed104f..c9c2e13056 100644
--- a/prism/encoding.c
+++ b/prism/encoding.c
@@ -1,8 +1,13 @@
-#include "prism/encoding.h"
+#include "prism/internal/encoding.h"
+
+#include "prism/compiler/unused.h"
+#include "prism/internal/strncasecmp.h"
+
+#include <assert.h>
 
 typedef uint32_t pm_unicode_codepoint_t;
 
-#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
+#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508
 static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
     0x100, 0x2C1,
     0x2C6, 0x2D1,
@@ -10,7 +15,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x2EC, 0x2EC,
     0x2EE, 0x2EE,
     0x345, 0x345,
-    0x370, 0x374,
+    0x363, 0x374,
     0x376, 0x377,
     0x37A, 0x37D,
     0x37F, 0x37F,
@@ -50,7 +55,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x840, 0x858,
     0x860, 0x86A,
     0x870, 0x887,
-    0x889, 0x88E,
+    0x889, 0x88F,
+    0x897, 0x897,
     0x8A0, 0x8C9,
     0x8D4, 0x8DF,
     0x8E3, 0x8E9,
@@ -140,7 +146,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0xC4A, 0xC4C,
     0xC55, 0xC56,
     0xC58, 0xC5A,
-    0xC5D, 0xC5D,
+    0xC5C, 0xC5D,
     0xC60, 0xC63,
     0xC80, 0xC83,
     0xC85, 0xC8C,
@@ -152,7 +158,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0xCC6, 0xCC8,
     0xCCA, 0xCCC,
     0xCD5, 0xCD6,
-    0xCDD, 0xCDE,
+    0xCDC, 0xCDE,
     0xCE0, 0xCE3,
     0xCF1, 0xCF3,
     0xD00, 0xD0C,
@@ -264,7 +270,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x1C00, 0x1C36,
     0x1C4D, 0x1C4F,
     0x1C5A, 0x1C7D,
-    0x1C80, 0x1C88,
+    0x1C80, 0x1C8A,
     0x1C90, 0x1CBA,
     0x1CBD, 0x1CBF,
     0x1CE9, 0x1CEC,
@@ -272,7 +278,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x1CF5, 0x1CF6,
     0x1CFA, 0x1CFA,
     0x1D00, 0x1DBF,
-    0x1DE7, 0x1DF4,
+    0x1DD3, 0x1DF4,
     0x1E00, 0x1F15,
     0x1F18, 0x1F1D,
     0x1F20, 0x1F45,
@@ -352,11 +358,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0xA67F, 0xA6EF,
     0xA717, 0xA71F,
     0xA722, 0xA788,
-    0xA78B, 0xA7CA,
-    0xA7D0, 0xA7D1,
-    0xA7D3, 0xA7D3,
-    0xA7D5, 0xA7D9,
-    0xA7F2, 0xA805,
+    0xA78B, 0xA7DC,
+    0xA7F1, 0xA805,
     0xA807, 0xA827,
     0xA840, 0xA873,
     0xA880, 0xA8C3,
@@ -446,6 +449,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x105A3, 0x105B1,
     0x105B3, 0x105B9,
     0x105BB, 0x105BC,
+    0x105C0, 0x105F3,
     0x10600, 0x10736,
     0x10740, 0x10755,
     0x10760, 0x10767,
@@ -464,6 +468,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x108F4, 0x108F5,
     0x10900, 0x10915,
     0x10920, 0x10939,
+    0x10940, 0x10959,
     0x10980, 0x109B7,
     0x109BE, 0x109BF,
     0x10A00, 0x10A03,
@@ -483,9 +488,14 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x10C80, 0x10CB2,
     0x10CC0, 0x10CF2,
     0x10D00, 0x10D27,
+    0x10D4A, 0x10D65,
+    0x10D69, 0x10D69,
+    0x10D6F, 0x10D85,
     0x10E80, 0x10EA9,
     0x10EAB, 0x10EAC,
     0x10EB0, 0x10EB1,
+    0x10EC2, 0x10EC7,
+    0x10EFA, 0x10EFC,
     0x10F00, 0x10F1C,
     0x10F27, 0x10F27,
     0x10F30, 0x10F45,
@@ -529,6 +539,17 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x11350, 0x11350,
     0x11357, 0x11357,
     0x1135D, 0x11363,
+    0x11380, 0x11389,
+    0x1138B, 0x1138B,
+    0x1138E, 0x1138E,
+    0x11390, 0x113B5,
+    0x113B7, 0x113C0,
+    0x113C2, 0x113C2,
+    0x113C5, 0x113C5,
+    0x113C7, 0x113CA,
+    0x113CC, 0x113CD,
+    0x113D1, 0x113D1,
+    0x113D3, 0x113D3,
     0x11400, 0x11441,
     0x11443, 0x11445,
     0x11447, 0x1144A,
@@ -567,6 +588,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x11A50, 0x11A97,
     0x11A9D, 0x11A9D,
     0x11AB0, 0x11AF8,
+    0x11B60, 0x11B67,
+    0x11BC0, 0x11BE0,
     0x11C00, 0x11C08,
     0x11C0A, 0x11C36,
     0x11C38, 0x11C3E,
@@ -588,6 +611,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x11D90, 0x11D91,
     0x11D93, 0x11D96,
     0x11D98, 0x11D98,
+    0x11DB0, 0x11DDB,
     0x11EE0, 0x11EF6,
     0x11F00, 0x11F10,
     0x11F12, 0x11F3A,
@@ -599,7 +623,9 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x12F90, 0x12FF0,
     0x13000, 0x1342F,
     0x13441, 0x13446,
+    0x13460, 0x143FA,
     0x14400, 0x14646,
+    0x16100, 0x1612E,
     0x16800, 0x16A38,
     0x16A40, 0x16A5E,
     0x16A70, 0x16ABE,
@@ -608,16 +634,19 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x16B40, 0x16B43,
     0x16B63, 0x16B77,
     0x16B7D, 0x16B8F,
+    0x16D40, 0x16D6C,
     0x16E40, 0x16E7F,
+    0x16EA0, 0x16EB8,
+    0x16EBB, 0x16ED3,
     0x16F00, 0x16F4A,
     0x16F4F, 0x16F87,
     0x16F8F, 0x16F9F,
     0x16FE0, 0x16FE1,
     0x16FE3, 0x16FE3,
-    0x16FF0, 0x16FF1,
-    0x17000, 0x187F7,
-    0x18800, 0x18CD5,
-    0x18D00, 0x18D08,
+    0x16FF0, 0x16FF6,
+    0x17000, 0x18CD5,
+    0x18CFF, 0x18D1E,
+    0x18D80, 0x18DF2,
     0x1AFF0, 0x1AFF3,
     0x1AFF5, 0x1AFFB,
     0x1AFFD, 0x1AFFE,
@@ -677,6 +706,11 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x1E290, 0x1E2AD,
     0x1E2C0, 0x1E2EB,
     0x1E4D0, 0x1E4EB,
+    0x1E5D0, 0x1E5ED,
+    0x1E5F0, 0x1E5F0,
+    0x1E6C0, 0x1E6DE,
+    0x1E6E0, 0x1E6F5,
+    0x1E6FE, 0x1E6FF,
     0x1E7E0, 0x1E7E6,
     0x1E7E8, 0x1E7EB,
     0x1E7ED, 0x1E7EE,
@@ -722,16 +756,16 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
     0x1F150, 0x1F169,
     0x1F170, 0x1F189,
     0x20000, 0x2A6DF,
-    0x2A700, 0x2B739,
-    0x2B740, 0x2B81D,
-    0x2B820, 0x2CEA1,
+    0x2A700, 0x2B81D,
+    0x2B820, 0x2CEAD,
     0x2CEB0, 0x2EBE0,
+    0x2EBF0, 0x2EE5D,
     0x2F800, 0x2FA1D,
     0x30000, 0x3134A,
-    0x31350, 0x323AF,
+    0x31350, 0x33479,
 };
 
-#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
+#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598
 static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
     0x100, 0x2C1,
     0x2C6, 0x2D1,
@@ -739,7 +773,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x2EC, 0x2EC,
     0x2EE, 0x2EE,
     0x345, 0x345,
-    0x370, 0x374,
+    0x363, 0x374,
     0x376, 0x377,
     0x37A, 0x37D,
     0x37F, 0x37F,
@@ -778,7 +812,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x840, 0x858,
     0x860, 0x86A,
     0x870, 0x887,
-    0x889, 0x88E,
+    0x889, 0x88F,
+    0x897, 0x897,
     0x8A0, 0x8C9,
     0x8D4, 0x8DF,
     0x8E3, 0x8E9,
@@ -872,7 +907,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0xC4A, 0xC4C,
     0xC55, 0xC56,
     0xC58, 0xC5A,
-    0xC5D, 0xC5D,
+    0xC5C, 0xC5D,
     0xC60, 0xC63,
     0xC66, 0xC6F,
     0xC80, 0xC83,
@@ -885,7 +920,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0xCC6, 0xCC8,
     0xCCA, 0xCCC,
     0xCD5, 0xCD6,
-    0xCDD, 0xCDE,
+    0xCDC, 0xCDE,
     0xCE0, 0xCE3,
     0xCE6, 0xCEF,
     0xCF1, 0xCF3,
@@ -1007,7 +1042,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x1C00, 0x1C36,
     0x1C40, 0x1C49,
     0x1C4D, 0x1C7D,
-    0x1C80, 0x1C88,
+    0x1C80, 0x1C8A,
     0x1C90, 0x1CBA,
     0x1CBD, 0x1CBF,
     0x1CE9, 0x1CEC,
@@ -1015,7 +1050,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x1CF5, 0x1CF6,
     0x1CFA, 0x1CFA,
     0x1D00, 0x1DBF,
-    0x1DE7, 0x1DF4,
+    0x1DD3, 0x1DF4,
     0x1E00, 0x1F15,
     0x1F18, 0x1F1D,
     0x1F20, 0x1F45,
@@ -1094,11 +1129,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0xA67F, 0xA6EF,
     0xA717, 0xA71F,
     0xA722, 0xA788,
-    0xA78B, 0xA7CA,
-    0xA7D0, 0xA7D1,
-    0xA7D3, 0xA7D3,
-    0xA7D5, 0xA7D9,
-    0xA7F2, 0xA805,
+    0xA78B, 0xA7DC,
+    0xA7F1, 0xA805,
     0xA807, 0xA827,
     0xA840, 0xA873,
     0xA880, 0xA8C3,
@@ -1191,6 +1223,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x105A3, 0x105B1,
     0x105B3, 0x105B9,
     0x105BB, 0x105BC,
+    0x105C0, 0x105F3,
     0x10600, 0x10736,
     0x10740, 0x10755,
     0x10760, 0x10767,
@@ -1209,6 +1242,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x108F4, 0x108F5,
     0x10900, 0x10915,
     0x10920, 0x10939,
+    0x10940, 0x10959,
     0x10980, 0x109B7,
     0x109BE, 0x109BF,
     0x10A00, 0x10A03,
@@ -1229,9 +1263,14 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x10CC0, 0x10CF2,
     0x10D00, 0x10D27,
     0x10D30, 0x10D39,
+    0x10D40, 0x10D65,
+    0x10D69, 0x10D69,
+    0x10D6F, 0x10D85,
     0x10E80, 0x10EA9,
     0x10EAB, 0x10EAC,
     0x10EB0, 0x10EB1,
+    0x10EC2, 0x10EC7,
+    0x10EFA, 0x10EFC,
     0x10F00, 0x10F1C,
     0x10F27, 0x10F27,
     0x10F30, 0x10F45,
@@ -1278,6 +1317,17 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x11350, 0x11350,
     0x11357, 0x11357,
     0x1135D, 0x11363,
+    0x11380, 0x11389,
+    0x1138B, 0x1138B,
+    0x1138E, 0x1138E,
+    0x11390, 0x113B5,
+    0x113B7, 0x113C0,
+    0x113C2, 0x113C2,
+    0x113C5, 0x113C5,
+    0x113C7, 0x113CA,
+    0x113CC, 0x113CD,
+    0x113D1, 0x113D1,
+    0x113D3, 0x113D3,
     0x11400, 0x11441,
     0x11443, 0x11445,
     0x11447, 0x1144A,
@@ -1297,6 +1347,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x11680, 0x116B5,
     0x116B8, 0x116B8,
     0x116C0, 0x116C9,
+    0x116D0, 0x116E3,
     0x11700, 0x1171A,
     0x1171D, 0x1172A,
     0x11730, 0x11739,
@@ -1322,6 +1373,9 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x11A50, 0x11A97,
     0x11A9D, 0x11A9D,
     0x11AB0, 0x11AF8,
+    0x11B60, 0x11B67,
+    0x11BC0, 0x11BE0,
+    0x11BF0, 0x11BF9,
     0x11C00, 0x11C08,
     0x11C0A, 0x11C36,
     0x11C38, 0x11C3E,
@@ -1346,6 +1400,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x11D93, 0x11D96,
     0x11D98, 0x11D98,
     0x11DA0, 0x11DA9,
+    0x11DB0, 0x11DDB,
+    0x11DE0, 0x11DE9,
     0x11EE0, 0x11EF6,
     0x11F00, 0x11F10,
     0x11F12, 0x11F3A,
@@ -1358,7 +1414,10 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x12F90, 0x12FF0,
     0x13000, 0x1342F,
     0x13441, 0x13446,
+    0x13460, 0x143FA,
     0x14400, 0x14646,
+    0x16100, 0x1612E,
+    0x16130, 0x16139,
     0x16800, 0x16A38,
     0x16A40, 0x16A5E,
     0x16A60, 0x16A69,
@@ -1370,16 +1429,20 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x16B50, 0x16B59,
     0x16B63, 0x16B77,
     0x16B7D, 0x16B8F,
+    0x16D40, 0x16D6C,
+    0x16D70, 0x16D79,
     0x16E40, 0x16E7F,
+    0x16EA0, 0x16EB8,
+    0x16EBB, 0x16ED3,
     0x16F00, 0x16F4A,
     0x16F4F, 0x16F87,
     0x16F8F, 0x16F9F,
     0x16FE0, 0x16FE1,
     0x16FE3, 0x16FE3,
-    0x16FF0, 0x16FF1,
-    0x17000, 0x187F7,
-    0x18800, 0x18CD5,
-    0x18D00, 0x18D08,
+    0x16FF0, 0x16FF6,
+    0x17000, 0x18CD5,
+    0x18CFF, 0x18D1E,
+    0x18D80, 0x18DF2,
     0x1AFF0, 0x1AFF3,
     0x1AFF5, 0x1AFFB,
     0x1AFFD, 0x1AFFE,
@@ -1394,6 +1457,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x1BC80, 0x1BC88,
     0x1BC90, 0x1BC99,
     0x1BC9E, 0x1BC9E,
+    0x1CCF0, 0x1CCF9,
     0x1D400, 0x1D454,
     0x1D456, 0x1D49C,
     0x1D49E, 0x1D49F,
@@ -1443,6 +1507,11 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x1E2F0, 0x1E2F9,
     0x1E4D0, 0x1E4EB,
     0x1E4F0, 0x1E4F9,
+    0x1E5D0, 0x1E5ED,
+    0x1E5F0, 0x1E5FA,
+    0x1E6C0, 0x1E6DE,
+    0x1E6E0, 0x1E6F5,
+    0x1E6FE, 0x1E6FF,
     0x1E7E0, 0x1E7E6,
     0x1E7E8, 0x1E7EB,
     0x1E7ED, 0x1E7EE,
@@ -1490,16 +1559,16 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
     0x1F170, 0x1F189,
     0x1FBF0, 0x1FBF9,
     0x20000, 0x2A6DF,
-    0x2A700, 0x2B739,
-    0x2B740, 0x2B81D,
-    0x2B820, 0x2CEA1,
+    0x2A700, 0x2B81D,
+    0x2B820, 0x2CEAD,
     0x2CEB0, 0x2EBE0,
+    0x2EBF0, 0x2EE5D,
     0x2F800, 0x2FA1D,
     0x30000, 0x3134A,
-    0x31350, 0x323AF,
+    0x31350, 0x33479,
 };
 
-#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302
+#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320
 static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
     0x100, 0x100,
     0x102, 0x102,
@@ -1774,6 +1843,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
     0x10C7, 0x10C7,
     0x10CD, 0x10CD,
     0x13A0, 0x13F5,
+    0x1C89, 0x1C89,
     0x1C90, 0x1CBA,
     0x1CBD, 0x1CBF,
     0x1E00, 0x1E00,
@@ -2103,9 +2173,15 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
     0xA7C2, 0xA7C2,
     0xA7C4, 0xA7C7,
     0xA7C9, 0xA7C9,
+    0xA7CB, 0xA7CC,
+    0xA7CE, 0xA7CE,
     0xA7D0, 0xA7D0,
+    0xA7D2, 0xA7D2,
+    0xA7D4, 0xA7D4,
     0xA7D6, 0xA7D6,
     0xA7D8, 0xA7D8,
+    0xA7DA, 0xA7DA,
+    0xA7DC, 0xA7DC,
     0xA7F5, 0xA7F5,
     0xFF21, 0xFF3A,
     0x10400, 0x10427,
@@ -2115,8 +2191,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
     0x1058C, 0x10592,
     0x10594, 0x10595,
     0x10C80, 0x10CB2,
+    0x10D50, 0x10D65,
     0x118A0, 0x118BF,
     0x16E40, 0x16E5F,
+    0x16EA0, 0x16EB8,
     0x1D400, 0x1D419,
     0x1D434, 0x1D44D,
     0x1D468, 0x1D481,
@@ -2304,6 +2382,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
  */
 size_t
 pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
     }
@@ -2324,6 +2406,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
  */
 size_t
 pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
     }
@@ -2344,6 +2430,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
  */
 bool
 pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
     }
@@ -2362,7 +2452,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
 
 static pm_unicode_codepoint_t
 pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
-    if (b[0] < 0x80) {
+
+    if ((n > 0) && (b[0] < 0x80)) {
         *width = 1;
         return (pm_unicode_codepoint_t) b[0];
     }
@@ -2401,6 +2492,10 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
 
 static size_t
 pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     size_t width;
     pm_cesu_8_codepoint(b, n, &width);
     return width;
@@ -2408,6 +2503,10 @@ pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
 
 static size_t
 pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
     }
@@ -2424,6 +2523,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
 
 static size_t
 pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
     }
@@ -2440,6 +2543,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
 
 static bool
 pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
     if (*b < 0x80) {
         return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
     }
@@ -3855,14 +3962,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
 };
 
 #define PRISM_ENCODING_TABLE(name) \
-    static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);           \
+    static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT));           \
     }                                                                                                         \
-    static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
+    static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
     }                                                                                                         \
-    static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {           \
-        return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT);            \
+    static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT));            \
     }
 
 PRISM_ENCODING_TABLE(cp850)
@@ -3931,8 +4038,8 @@ PRISM_ENCODING_TABLE(windows_874)
  * means that if the top bit is not set, the character is 1 byte long.
  */
 static size_t
-pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return *b < 0x80 ? 1 : 0;
+pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (*b < 0x80)) ? 1 : 0;
 }
 
 /**
@@ -3940,8 +4047,8 @@ pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
  * alphabetical character.
  */
 static size_t
-pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
+pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
 }
 
 /**
@@ -3951,7 +4058,7 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
  */
 static size_t
 pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
-    return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
+    return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
 }
 
 /**
@@ -3959,8 +4066,8 @@ pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
  * alphanumeric character.
  */
 static size_t
-pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
+pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
 }
 
 /**
@@ -3970,7 +4077,7 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
  */
 static size_t
 pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
-    return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
+    return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
 }
 
 /**
@@ -3978,8 +4085,8 @@ pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
  * character.
  */
 static bool
-pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
-    return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
+pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
 }
 
 /**
@@ -3987,7 +4094,7 @@ pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_
  * matter what the codepoint, so this function is shared between them.
  */
 static size_t
-pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
+pm_encoding_single_char_width(PRISM_UNUSED const uint8_t *b, PRISM_UNUSED ptrdiff_t n) {
     return 1;
 }
 
@@ -3998,7 +4105,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
 static size_t
 pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4042,6 +4149,9 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
  */
 static size_t
 pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
     // These are the single byte characters.
     if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
         return 1;
@@ -4105,7 +4215,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
  */
 static bool
 pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
-    return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
+    return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
 }
 
 /**
@@ -4115,7 +4225,7 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4134,7 +4244,7 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters
-    if (*b <= 0x80) {
+    if ((n > 0) && (*b <= 0x80)) {
         return 1;
     }
 
@@ -4153,7 +4263,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the 1 byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4196,7 +4306,7 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4215,7 +4325,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4239,7 +4349,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the 1 byte characters.
-    if (*b < 0x80) {
+    if ((n > 0) && (*b < 0x80)) {
         return 1;
     }
 
@@ -4263,7 +4373,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
 static size_t
 pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
     // These are the single byte characters.
-    if (*b <= 0x80) {
+    if ((n > 0) && (*b <= 0x80)) {
         return 1;
     }
 
diff --git a/prism/excludes.h b/prism/excludes.h
new file mode 100644
index 0000000000..8600622f63
--- /dev/null
+++ b/prism/excludes.h
@@ -0,0 +1,29 @@
+/**
+ * @file excludes.h
+ *
+ * A header file that defines macros to exclude certain features of the prism
+ * library. This is useful for reducing the size of the library when certain
+ * features are not needed.
+ */
+#ifndef PRISM_EXCLUDES_H
+#define PRISM_EXCLUDES_H
+
+/**
+ * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
+ * switch that will turn off certain features of prism.
+ */
+#ifdef PRISM_BUILD_MINIMAL
+    /** Exclude the serialization API. */
+    #define PRISM_EXCLUDE_SERIALIZATION
+
+    /** Exclude the JSON serialization API. */
+    #define PRISM_EXCLUDE_JSON
+
+    /** Exclude the prettyprint API. */
+    #define PRISM_EXCLUDE_PRETTYPRINT
+
+    /** Exclude the full set of encodings, using the minimal only. */
+    #define PRISM_ENCODING_EXCLUDE_FULL
+#endif
+
+#endif
diff --git a/prism/extension.c b/prism/extension.c
index 1533ca7bb3..27df8dac50 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -4,6 +4,8 @@
 #include <ruby/win32.h>
 #endif
 
+#include <errno.h>
+
 // NOTE: this file should contain only bindings. All non-trivial logic should be
 // in libprism so it can be shared its the various callers.
 
@@ -25,6 +27,7 @@ VALUE rb_cPrismLexResult;
 VALUE rb_cPrismParseLexResult;
 VALUE rb_cPrismStringQuery;
 VALUE rb_cPrismScope;
+VALUE rb_cPrismCurrentVersionError;
 
 VALUE rb_cPrismDebugEncoding;
 
@@ -63,18 +66,6 @@ check_string(VALUE value) {
     return RSTRING_PTR(value);
 }
 
-/**
- * Load the contents and size of the given string into the given pm_string_t.
- */
-static void
-input_load_string(pm_string_t *input, VALUE string) {
-    // Check if the string is a string. If it's not, then raise a type error.
-    if (!RB_TYPE_P(string, T_STRING)) {
-        rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
-    }
-
-    pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
-}
 
 /******************************************************************************/
 /* Building C options from Ruby options                                       */
@@ -147,10 +138,8 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
 
         // Initialize the scope array.
         size_t locals_count = RARRAY_LEN(locals);
-        pm_options_scope_t *options_scope = &options->scopes[scope_index];
-        if (!pm_options_scope_init(options_scope, locals_count)) {
-            rb_raise(rb_eNoMemError, "failed to allocate memory");
-        }
+        pm_options_scope_t *options_scope = pm_options_scope_mut(options, scope_index);
+        pm_options_scope_init(options_scope, locals_count);
 
         // Iterate over the locals and add them to the scope.
         for (size_t local_index = 0; local_index < locals_count; local_index++) {
@@ -163,7 +152,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
             }
 
             // Add the local to the scope.
-            pm_string_t *scope_local = &options_scope->locals[local_index];
+            pm_string_t *scope_local = pm_options_scope_local_mut(options_scope, local_index);
             const char *name = rb_id2name(SYM2ID(local));
             pm_string_constant_init(scope_local, name, strlen(name));
         }
@@ -199,7 +188,21 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
         if (!NIL_P(value)) {
             const char *version = check_string(value);
 
-            if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
+            if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) {
+                if (!pm_options_version_set(options, ruby_version, 3)) {
+                    rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, ruby_version));
+                }
+            } else if (RSTRING_LEN(value) == 7 && strncmp(version, "nearest", 7) == 0) {
+                if (!pm_options_version_set(options, ruby_version, 3)) {
+                    // Prism doesn't know this specific version. Is it lower?
+                    if (ruby_version[0] < '3' || (ruby_version[0] == '3' && ruby_version[2] < '3')) {
+                        pm_options_version_set_lowest(options);
+                    } else {
+                        // Must be higher.
+                        pm_options_version_set_highest(options);
+                    }
+                }
+            } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
                 rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
             }
         }
@@ -263,7 +266,7 @@ build_options(VALUE argument) {
  */
 static void
 extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
-    options->line = 1; // default
+    pm_options_line_set(options, 1); /* default */
 
     if (!NIL_P(keywords)) {
         struct build_options_data data = { .options = options, .keywords = keywords };
@@ -291,36 +294,46 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
 /**
  * Read options for methods that look like (source, **options).
  */
-static void
-string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
+static VALUE
+string_options(int argc, VALUE *argv, pm_options_t *options) {
     VALUE string;
     VALUE keywords;
     rb_scan_args(argc, argv, "1:", &string, &keywords);
 
+    if (!RB_TYPE_P(string, T_STRING)) {
+        pm_options_free(options);
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
+    }
+
     extract_options(options, Qnil, keywords);
-    input_load_string(input, string);
+    return string;
 }
 
 /**
  * Read options for methods that look like (filepath, **options).
  */
-static void
-file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
+static pm_source_t *
+file_options(int argc, VALUE *argv, pm_options_t *options, VALUE *encoded_filepath) {
     VALUE filepath;
     VALUE keywords;
     rb_scan_args(argc, argv, "1:", &filepath, &keywords);
 
-    Check_Type(filepath, T_STRING);
+    if (!RB_TYPE_P(filepath, T_STRING)) {
+        pm_options_free(options);
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
+    }
+
     *encoded_filepath = rb_str_encode_ospath(filepath);
     extract_options(options, *encoded_filepath, keywords);
 
-    const char *source = (const char *) pm_string_source(&options->filepath);
-    pm_string_init_result_t result;
+    const char *source = (const char *) pm_string_source(pm_options_filepath(options));
+    pm_source_init_result_t result;
+    pm_source_t *pm_src = pm_source_file_new(source, &result);
 
-    switch (result = pm_string_file_init(input, source)) {
-        case PM_STRING_INIT_SUCCESS:
+    switch (result) {
+        case PM_SOURCE_INIT_SUCCESS:
             break;
-        case PM_STRING_INIT_ERROR_GENERIC: {
+        case PM_SOURCE_INIT_ERROR_GENERIC: {
             pm_options_free(options);
 
 #ifdef _WIN32
@@ -332,7 +345,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
             rb_syserr_fail(e, source);
             break;
         }
-        case PM_STRING_INIT_ERROR_DIRECTORY:
+        case PM_SOURCE_INIT_ERROR_DIRECTORY:
             pm_options_free(options);
             rb_syserr_fail(EISDIR, source);
             break;
@@ -341,6 +354,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
             rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
             break;
     }
+
+    return pm_src;
 }
 
 #ifndef PRISM_EXCLUDE_SERIALIZATION
@@ -353,77 +368,82 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
  * Dump the AST corresponding to the given input to a string.
  */
 static VALUE
-dump_input(pm_string_t *input, const pm_options_t *options) {
-    pm_buffer_t buffer;
-    if (!pm_buffer_init(&buffer)) {
+dump_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+    pm_buffer_t *buffer = pm_buffer_new();
+    if (!buffer) {
         rb_raise(rb_eNoMemError, "failed to allocate memory");
     }
 
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
 
-    pm_node_t *node = pm_parse(&parser);
-    pm_serialize(&parser, node, &buffer);
+    pm_node_t *node = pm_parse(parser);
+    pm_serialize(parser, node, buffer);
 
-    VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
-    pm_node_destroy(&parser, node);
-    pm_buffer_free(&buffer);
-    pm_parser_free(&parser);
+    VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer));
+    pm_buffer_free(buffer);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::dump(source, **options) -> String
+ *   dump(source, **options) -> String
  *
  * Dump the AST corresponding to the given string to a string. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 dump(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
+
+    const uint8_t *source = (const uint8_t *) RSTRING_PTR(string);
+    size_t length = RSTRING_LEN(string);
 
 #ifdef PRISM_BUILD_DEBUG
-    size_t length = pm_string_length(&input);
     char* dup = xmalloc(length);
-    memcpy(dup, pm_string_source(&input), length);
-    pm_string_constant_init(&input, dup, length);
+    memcpy(dup, source, length);
+    source = (const uint8_t *) dup;
 #endif
 
-    VALUE value = dump_input(&input, &options);
-    if (options.freeze) rb_obj_freeze(value);
+    VALUE value = dump_input(source, length, options);
+    if (pm_options_freeze(options)) rb_obj_freeze(value);
 
 #ifdef PRISM_BUILD_DEBUG
+#ifdef xfree_sized
+    xfree_sized(dup, length);
+#else
     xfree(dup);
 #endif
+#endif
 
-    pm_string_free(&input);
-    pm_options_free(&options);
+    pm_options_free(options);
 
     return value;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::dump_file(filepath, **options) -> String
+ *   dump_file(filepath, **options) -> String
  *
  * Dump the AST corresponding to the given file to a string. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 dump_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE value = dump_input(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = dump_input(pm_source_source(src), pm_source_length(src), options);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return value;
 }
@@ -449,42 +469,49 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free
  * Create a new Location instance from the given parser and bounds.
  */
 static inline VALUE
-parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
-    VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
+parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) {
+    VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
     return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
 }
 
 /**
  * Create a new Location instance from the given parser and location.
  */
-#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
-    parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
+#define PARSER_LOCATION(source, freeze, location) \
+    parser_location(source, freeze, location.start, location.length)
 
 /**
  * Build a new Comment instance from the given parser and comment.
  */
 static inline VALUE
-parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
-    VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
-    VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
+parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) {
+    VALUE argv[] = { PARSER_LOCATION(source, freeze, pm_comment_location(comment)) };
+    VALUE type = (pm_comment_type(comment) == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
     return rb_class_new_instance_freeze(1, argv, type, freeze);
 }
 
+typedef struct {
+    VALUE comments;
+    VALUE source;
+    bool freeze;
+} parser_comments_each_data_t;
+
+static void
+parser_comments_each(const pm_comment_t *comment, void *data) {
+    parser_comments_each_data_t *each_data = (parser_comments_each_data_t *) data;
+    VALUE value = parser_comment(each_data->source, each_data->freeze, comment);
+    rb_ary_push(each_data->comments, value);
+}
+
 /**
  * Extract the comments out of the parser into an array.
  */
 static VALUE
 parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
-    VALUE comments = rb_ary_new_capa(parser->comment_list.size);
-
-    for (
-        const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head;
-        comment != NULL;
-        comment = (const pm_comment_t *) comment->node.next
-    ) {
-        VALUE value = parser_comment(parser, source, freeze, comment);
-        rb_ary_push(comments, value);
-    }
+    VALUE comments = rb_ary_new_capa(pm_parser_comments_size(parser));
+
+    parser_comments_each_data_t each_data = { comments, source, freeze };
+    pm_parser_comments_each(parser, parser_comments_each, &each_data);
 
     if (freeze) rb_obj_freeze(comments);
     return comments;
@@ -494,28 +521,39 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
  * Build a new MagicComment instance from the given parser and magic comment.
  */
 static inline VALUE
-parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
-    VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
-    VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
+parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
+    pm_location_t key = pm_magic_comment_key(magic_comment);
+    pm_location_t value = pm_magic_comment_value(magic_comment);
+
+    VALUE key_loc = parser_location(source, freeze, key.start, key.length);
+    VALUE value_loc = parser_location(source, freeze, value.start, value.length);
+
     VALUE argv[] = { key_loc, value_loc };
     return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
 }
 
+typedef struct {
+    VALUE magic_comments;
+    VALUE source;
+    bool freeze;
+} parser_magic_comments_each_data_t;
+
+static void
+parser_magic_comments_each(const pm_magic_comment_t *magic_comment, void *data) {
+    parser_magic_comments_each_data_t *each_data = (parser_magic_comments_each_data_t *) data;
+    VALUE value = parser_magic_comment(each_data->source, each_data->freeze, magic_comment);
+    rb_ary_push(each_data->magic_comments, value);
+}
+
 /**
  * Extract the magic comments out of the parser into an array.
  */
 static VALUE
 parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
-    VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
-
-    for (
-        const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head;
-        magic_comment != NULL;
-        magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
-    ) {
-        VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
-        rb_ary_push(magic_comments, value);
-    }
+    VALUE magic_comments = rb_ary_new_capa(pm_parser_magic_comments_size(parser));
+
+    parser_magic_comments_each_data_t each_data = { magic_comments, source, freeze };
+    pm_parser_magic_comments_each(parser, parser_magic_comments_each, &each_data);
 
     if (freeze) rb_obj_freeze(magic_comments);
     return magic_comments;
@@ -527,85 +565,109 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
  */
 static VALUE
 parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
-    if (parser->data_loc.end == NULL) {
+    const pm_location_t *data_loc = pm_parser_data_loc(parser);
+
+    if (data_loc->length == 0) {
         return Qnil;
     } else {
-        return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
+        return parser_location(source, freeze, data_loc->start, data_loc->length);
     }
 }
 
+typedef struct {
+    VALUE errors;
+    rb_encoding *encoding;
+    VALUE source;
+    bool freeze;
+} parser_errors_each_data_t;
+
+static void
+parser_errors_each(const pm_diagnostic_t *diagnostic, void *data) {
+    parser_errors_each_data_t *each_data = (parser_errors_each_data_t *) data;
+
+    VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic)));
+    VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding));
+    VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic));
+
+    pm_error_level_t error_level = pm_diagnostic_error_level(diagnostic);
+    VALUE level = Qnil;
+
+    switch (error_level) {
+        case PM_ERROR_LEVEL_SYNTAX:
+            level = ID2SYM(rb_intern("syntax"));
+            break;
+        case PM_ERROR_LEVEL_ARGUMENT:
+            level = ID2SYM(rb_intern("argument"));
+            break;
+        case PM_ERROR_LEVEL_LOAD:
+            level = ID2SYM(rb_intern("load"));
+            break;
+        default:
+            rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error_level);
+    }
+
+    VALUE argv[] = { type, message, location, level };
+    VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, each_data->freeze);
+    rb_ary_push(each_data->errors, value);
+}
+
 /**
  * Extract the errors out of the parser into an array.
  */
 static VALUE
 parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
-    VALUE errors = rb_ary_new_capa(parser->error_list.size);
-
-    for (
-        const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head;
-        error != NULL;
-        error = (const pm_diagnostic_t *) error->node.next
-    ) {
-        VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
-        VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
-        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
-
-        VALUE level = Qnil;
-        switch (error->level) {
-            case PM_ERROR_LEVEL_SYNTAX:
-                level = ID2SYM(rb_intern("syntax"));
-                break;
-            case PM_ERROR_LEVEL_ARGUMENT:
-                level = ID2SYM(rb_intern("argument"));
-                break;
-            case PM_ERROR_LEVEL_LOAD:
-                level = ID2SYM(rb_intern("load"));
-                break;
-            default:
-                rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
-        }
+    VALUE errors = rb_ary_new_capa(pm_parser_errors_size(parser));
 
-        VALUE argv[] = { type, message, location, level };
-        VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze);
-        rb_ary_push(errors, value);
-    }
+    parser_errors_each_data_t each_data = { errors, encoding, source, freeze };
+    pm_parser_errors_each(parser, parser_errors_each, &each_data);
 
     if (freeze) rb_obj_freeze(errors);
     return errors;
 }
 
+typedef struct {
+    VALUE warnings;
+    rb_encoding *encoding;
+    VALUE source;
+    bool freeze;
+} parser_warnings_each_data_t;
+
+static void
+parser_warnings_each(const pm_diagnostic_t *diagnostic, void *data) {
+    parser_warnings_each_data_t *each_data = (parser_warnings_each_data_t *) data;
+
+    VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic)));
+    VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding));
+    VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic));
+
+    pm_warning_level_t warning_level = pm_diagnostic_warning_level(diagnostic);
+    VALUE level = Qnil;
+
+    switch (warning_level) {
+        case PM_WARNING_LEVEL_DEFAULT:
+            level = ID2SYM(rb_intern("default"));
+            break;
+        case PM_WARNING_LEVEL_VERBOSE:
+            level = ID2SYM(rb_intern("verbose"));
+            break;
+        default:
+            rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning_level);
+    }
+
+    VALUE argv[] = { type, message, location, level };
+    VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, each_data->freeze);
+    rb_ary_push(each_data->warnings, value);
+}
+
 /**
  * Extract the warnings out of the parser into an array.
  */
 static VALUE
 parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
-    VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
-
-    for (
-        const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head;
-        warning != NULL;
-        warning = (const pm_diagnostic_t *) warning->node.next
-    ) {
-        VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
-        VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
-        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
-
-        VALUE level = Qnil;
-        switch (warning->level) {
-            case PM_WARNING_LEVEL_DEFAULT:
-                level = ID2SYM(rb_intern("default"));
-                break;
-            case PM_WARNING_LEVEL_VERBOSE:
-                level = ID2SYM(rb_intern("verbose"));
-                break;
-            default:
-                rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
-        }
+    VALUE warnings = rb_ary_new_capa(pm_parser_warnings_size(parser));
 
-        VALUE argv[] = { type, message, location, level };
-        VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze);
-        rb_ary_push(warnings, value);
-    }
+    parser_warnings_each_data_t each_data = { warnings, encoding, source, freeze };
+    pm_parser_warnings_each(parser, parser_warnings_each, &each_data);
 
     if (freeze) rb_obj_freeze(warnings);
     return warnings;
@@ -623,10 +685,11 @@ parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_enco
         parser_data_loc(parser, source, freeze),
         parser_errors(parser, encoding, source, freeze),
         parser_warnings(parser, encoding, source, freeze),
+        pm_parser_continuable(parser) ? Qtrue : Qfalse,
         source
     };
 
-    return rb_class_new_instance_freeze(7, result_argv, class, freeze);
+    return rb_class_new_instance_freeze(8, result_argv, class, freeze);
 }
 
 /******************************************************************************/
@@ -651,11 +714,11 @@ typedef struct {
  * onto the tokens array.
  */
 static void
-parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
-    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
+parse_lex_token(pm_parser_t *parser, pm_token_t *token, void *data) {
+    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) data;
 
     VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze);
-    VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state));
+    VALUE yields = rb_assoc_new(value, INT2FIX(pm_parser_lex_state(parser)));
 
     if (parse_lex_data->freeze) {
         rb_obj_freeze(value);
@@ -672,8 +735,8 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
  */
 static void
 parse_lex_encoding_changed_callback(pm_parser_t *parser) {
-    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
-    parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
+    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) pm_parser_lex_callback_data(parser);
+    parse_lex_data->encoding = rb_enc_find(pm_parser_encoding_name(parser));
 
     // Since the encoding changed, we need to go back and change the encoding of
     // the tokens that were already lexed. This is only going to end up being
@@ -718,43 +781,38 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
  * the nodes and tokens.
  */
 static VALUE
-parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
-    pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
+parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *options, bool return_nodes) {
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
+    pm_parser_encoding_changed_callback_set(parser, parse_lex_encoding_changed_callback);
 
-    VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
-    VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
-    VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
+    VALUE source_string = rb_str_new((const char *) input, input_length);
+    VALUE offsets = rb_ary_new_capa(pm_parser_line_offsets(parser)->size);
+    VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets);
 
     parse_lex_data_t parse_lex_data = {
         .source = source,
         .tokens = rb_ary_new(),
-        .encoding = rb_utf8_encoding(),
-        .freeze = options->freeze,
+        .encoding = rb_enc_find(pm_parser_encoding_name(parser)),
+        .freeze = pm_options_freeze(options),
     };
 
     parse_lex_data_t *data = &parse_lex_data;
-    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
-        .data = (void *) data,
-        .callback = parse_lex_token,
-    };
+    pm_parser_lex_callback_set(parser, parse_lex_token, data);
 
-    parser.lex_callback = &lex_callback;
-    pm_node_t *node = pm_parse(&parser);
+    pm_node_t *node = pm_parse(parser);
 
-    // Here we need to update the Source object to have the correct
-    // encoding for the source string and the correct newline offsets.
-    // We do it here because we've already created the Source object and given
-    // it over to all of the tokens, and both of these are only set after pm_parse().
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    /* Update the Source object with the correct encoding and line offsets,
+     * which are only available after pm_parse() completes. */
+    rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
     rb_enc_associate(source_string, encoding);
 
-    for (size_t index = 0; index < parser.newline_list.size; index++) {
-        rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
+    const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
+    for (size_t index = 0; index < line_offsets->size; index++) {
+        rb_ary_store(offsets, (long) index, ULONG2NUM(line_offsets->offsets[index]));
     }
 
-    if (options->freeze) {
+    if (pm_options_freeze(options)) {
         rb_obj_freeze(source_string);
         rb_obj_freeze(offsets);
         rb_obj_freeze(source);
@@ -764,58 +822,57 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
     VALUE result;
     if (return_nodes) {
         VALUE value = rb_ary_new_capa(2);
-        rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze));
+        rb_ary_push(value, pm_ast_new(parser, node, parse_lex_data.encoding, source, pm_options_freeze(options)));
         rb_ary_push(value, parse_lex_data.tokens);
-        if (options->freeze) rb_obj_freeze(value);
-        result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze);
+        if (pm_options_freeze(options)) rb_obj_freeze(value);
+        result = parse_result_create(rb_cPrismParseLexResult, parser, value, parse_lex_data.encoding, source, pm_options_freeze(options));
     } else {
-        result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
+        result = parse_result_create(rb_cPrismLexResult, parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze(options));
     }
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::lex(source, **options) -> LexResult
+ *   lex(source, **options) -> LexResult
  *
  * Return a LexResult instance that contains an array of Token instances
- * corresponding to the given string. For supported options, see Prism::parse.
+ * corresponding to the given string. For supported options, see Prism.parse.
  */
 static VALUE
 lex(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
 
-    VALUE result = parse_lex_input(&input, &options, false);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE result = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, false);
+    pm_options_free(options);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::lex_file(filepath, **options) -> LexResult
+ *   lex_file(filepath, **options) -> LexResult
  *
  * Return a LexResult instance that contains an array of Token instances
- * corresponding to the given file. For supported options, see Prism::parse.
+ * corresponding to the given file. For supported options, see Prism.parse.
  */
 static VALUE
 lex_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE value = parse_lex_input(&input, &options, false);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, false);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return value;
 }
@@ -828,30 +885,32 @@ lex_file(int argc, VALUE *argv, VALUE self) {
  * Parse the given input and return a ParseResult instance.
  */
 static VALUE
-parse_input(pm_string_t *input, const pm_options_t *options) {
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
 
-    pm_node_t *node = pm_parse(&parser);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    pm_node_t *node = pm_parse(parser);
+    rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
 
-    VALUE source = pm_source_new(&parser, encoding, options->freeze);
-    VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze);
-    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze);
+    bool freeze = pm_options_freeze(options);
+    VALUE source = pm_source_new(parser, encoding, freeze);
+    VALUE value = pm_ast_new(parser, node, encoding, source, freeze);
+    VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, freeze);
 
-    if (options->freeze) {
+    if (freeze) {
         rb_obj_freeze(source);
     }
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse(source, **options) -> ParseResult
+ *   parse(source, **options) -> ParseResult
  *
  * Parse the given string and return a ParseResult instance. The options that
  * are supported are:
@@ -888,51 +947,57 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
  *       version of Ruby syntax (which you can trigger with `nil` or
  *       `"latest"`). You may also restrict the syntax to a specific version of
  *       Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that
- *       the current Ruby is running use `version: RUBY_VERSION`. Raises
- *       ArgumentError if the version is not currently supported by Prism.
+ *       the current Ruby is running use `version: "current"`. To parse with the
+ *       nearest version to the current Ruby that is running, use
+ *       `version: "nearest"`. Raises ArgumentError if the version is not
+ *       currently supported by Prism.
  */
 static VALUE
 parse(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
+
+    const uint8_t *source = (const uint8_t *) RSTRING_PTR(string);
+    size_t length = RSTRING_LEN(string);
 
 #ifdef PRISM_BUILD_DEBUG
-    size_t length = pm_string_length(&input);
     char* dup = xmalloc(length);
-    memcpy(dup, pm_string_source(&input), length);
-    pm_string_constant_init(&input, dup, length);
+    memcpy(dup, source, length);
+    source = (const uint8_t *) dup;
 #endif
 
-    VALUE value = parse_input(&input, &options);
+    VALUE value = parse_input(source, length, options);
 
 #ifdef PRISM_BUILD_DEBUG
+#ifdef xfree_sized
+    xfree_sized(dup, length);
+#else
     xfree(dup);
 #endif
+#endif
 
-    pm_string_free(&input);
-    pm_options_free(&options);
+    pm_options_free(options);
     return value;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_file(filepath, **options) -> ParseResult
+ *   parse_file(filepath, **options) -> ParseResult
  *
  * Parse the given file and return a ParseResult instance. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 parse_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE value = parse_input(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = parse_input(pm_source_source(src), pm_source_length(src), options);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return value;
 }
@@ -941,59 +1006,66 @@ parse_file(int argc, VALUE *argv, VALUE self) {
  * Parse the given input and return nothing.
  */
 static void
-profile_input(pm_string_t *input, const pm_options_t *options) {
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+profile_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
 
-    pm_node_t *node = pm_parse(&parser);
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
+    pm_parse(parser);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::profile(source, **options) -> nil
+ *   profile(source, **options) -> nil
  *
  * Parse the given string and return nothing. This method is meant to allow
  * profilers to avoid the overhead of reifying the AST to Ruby. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 profile(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
 
-    string_options(argc, argv, &input, &options);
-    profile_input(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    profile_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+    pm_options_free(options);
 
     return Qnil;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::profile_file(filepath, **options) -> nil
+ *   profile_file(filepath, **options) -> nil
  *
  * Parse the given file and return nothing. This method is meant to allow
  * profilers to avoid the overhead of reifying the AST to Ruby. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 profile_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    profile_input(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    profile_input(pm_source_source(src), pm_source_length(src), options);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return Qnil;
 }
 
+static int
+parse_stream_eof(void *stream) {
+    if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
+        return 1;
+    }
+    return 0;
+}
+
 /**
  * An implementation of fgets that is suitable for use with Ruby IO objects.
  */
@@ -1016,11 +1088,12 @@ parse_stream_fgets(char *string, int size, void *stream) {
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_stream(stream, **options) -> ParseResult
+ *   parse_stream(stream, **options) -> ParseResult
  *
  * Parse the given object that responds to `gets` and return a ParseResult
- * instance. The options that are supported are the same as Prism::parse.
+ * instance. The options that are supported are the same as Prism.parse.
  */
 static VALUE
 parse_stream(int argc, VALUE *argv, VALUE self) {
@@ -1028,22 +1101,24 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
     VALUE keywords;
     rb_scan_args(argc, argv, "1:", &stream, &keywords);
 
-    pm_options_t options = { 0 };
-    extract_options(&options, Qnil, keywords);
+    pm_options_t *options = pm_options_new();
+    extract_options(options, Qnil, keywords);
 
-    pm_parser_t parser;
-    pm_buffer_t buffer;
+    pm_source_t *src = pm_source_stream_new((void *) stream, parse_stream_fgets, parse_stream_eof);
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser;
 
-    pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    pm_node_t *node = pm_parse_stream(&parser, arena, src, options);
+    rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
 
-    VALUE source = pm_source_new(&parser, encoding, options.freeze);
-    VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
-    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);
+    VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options));
+    VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze(options));
+    VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze(options));
 
-    pm_node_destroy(&parser, node);
-    pm_buffer_free(&buffer);
-    pm_parser_free(&parser);
+    pm_source_free(src);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
+    pm_options_free(options);
 
     return result;
 }
@@ -1052,116 +1127,114 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
  * Parse the given input and return an array of Comment objects.
  */
 static VALUE
-parse_input_comments(pm_string_t *input, const pm_options_t *options) {
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input_comments(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
 
-    pm_node_t *node = pm_parse(&parser);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    pm_parse(parser);
+    rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
 
-    VALUE source = pm_source_new(&parser, encoding, options->freeze);
-    VALUE comments = parser_comments(&parser, source, options->freeze);
+    VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options));
+    VALUE comments = parser_comments(parser, source, pm_options_freeze(options));
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 
     return comments;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_comments(source, **options) -> Array
+ *   parse_comments(source, **options) -> Array
  *
  * Parse the given string and return an array of Comment objects. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 parse_comments(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
 
-    VALUE result = parse_input_comments(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE result = parse_input_comments((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+    pm_options_free(options);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_file_comments(filepath, **options) -> Array
+ *   parse_file_comments(filepath, **options) -> Array
  *
  * Parse the given file and return an array of Comment objects. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
  */
 static VALUE
 parse_file_comments(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE value = parse_input_comments(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = parse_input_comments(pm_source_source(src), pm_source_length(src), options);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return value;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_lex(source, **options) -> ParseLexResult
+ *   parse_lex(source, **options) -> ParseLexResult
  *
  * Parse the given string and return a ParseLexResult instance that contains a
  * 2-element array, where the first element is the AST and the second element is
  * an array of Token instances.
  *
  * This API is only meant to be used in the case where you need both the AST and
- * the tokens. If you only need one or the other, use either Prism::parse or
- * Prism::lex.
+ * the tokens. If you only need one or the other, use either Prism.parse or
+ * Prism.lex.
  *
- * For supported options, see Prism::parse.
+ * For supported options, see Prism.parse.
  */
 static VALUE
 parse_lex(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
 
-    VALUE value = parse_lex_input(&input, &options, true);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, true);
+    pm_options_free(options);
 
     return value;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_lex_file(filepath, **options) -> ParseLexResult
+ *   parse_lex_file(filepath, **options) -> ParseLexResult
  *
  * Parse the given file and return a ParseLexResult instance that contains a
  * 2-element array, where the first element is the AST and the second element is
  * an array of Token instances.
  *
  * This API is only meant to be used in the case where you need both the AST and
- * the tokens. If you only need one or the other, use either Prism::parse_file
- * or Prism::lex_file.
+ * the tokens. If you only need one or the other, use either Prism.parse_file
+ * or Prism.lex_file.
  *
- * For supported options, see Prism::parse.
+ * For supported options, see Prism.parse.
  */
 static VALUE
 parse_lex_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE value = parse_lex_input(&input, &options, true);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, true);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return value;
 }
@@ -1170,45 +1243,45 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
  * Parse the given input and return true if it parses without errors.
  */
 static VALUE
-parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input_success_p(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+    pm_arena_t *arena = pm_arena_new();
+    pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
 
-    pm_node_t *node = pm_parse(&parser);
-    pm_node_destroy(&parser, node);
+    pm_parse(parser);
 
-    VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
-    pm_parser_free(&parser);
+    VALUE result = pm_parser_errors_size(parser) == 0 ? Qtrue : Qfalse;
+    pm_parser_free(parser);
+    pm_arena_free(arena);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_success?(source, **options) -> bool
+ *   parse_success?(source, **options) -> bool
  *
  * Parse the given string and return true if it parses without errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
  */
 static VALUE
 parse_success_p(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
+    pm_options_t *options = pm_options_new();
+    VALUE string = string_options(argc, argv, options);
 
-    VALUE result = parse_input_success_p(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE result = parse_input_success_p((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+    pm_options_free(options);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_failure?(source, **options) -> bool
+ *   parse_failure?(source, **options) -> bool
  *
  * Parse the given string and return true if it parses with errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
  */
 static VALUE
 parse_failure_p(int argc, VALUE *argv, VALUE self) {
@@ -1216,33 +1289,34 @@ parse_failure_p(int argc, VALUE *argv, VALUE self) {
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_file_success?(filepath, **options) -> bool
+ *   parse_file_success?(filepath, **options) -> bool
  *
  * Parse the given file and return true if it parses without errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
  */
 static VALUE
 parse_file_success_p(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
+    pm_options_t *options = pm_options_new();
 
     VALUE encoded_filepath;
-    file_options(argc, argv, &input, &options, &encoded_filepath);
+    pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
 
-    VALUE result = parse_input_success_p(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
+    VALUE result = parse_input_success_p(pm_source_source(src), pm_source_length(src), options);
+    pm_source_free(src);
+    pm_options_free(options);
 
     return result;
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::parse_file_failure?(filepath, **options) -> bool
+ *   parse_file_failure?(filepath, **options) -> bool
  *
  * Parse the given file and return true if it parses with errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
  */
 static VALUE
 parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
@@ -1272,8 +1346,9 @@ string_query(pm_string_query_t result) {
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::StringQuery::local?(string) -> bool
+ *   local?(string) -> bool
  *
  * Returns true if the string constitutes a valid local variable name. Note that
  * this means the names that can be set through Binding#local_variable_set, not
@@ -1286,8 +1361,9 @@ string_query_local_p(VALUE self, VALUE string) {
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::StringQuery::constant?(string) -> bool
+ *   constant?(string) -> bool
  *
  * Returns true if the string constitutes a valid constant name. Note that this
  * means the names that can be set through Module#const_set, not necessarily the
@@ -1300,8 +1376,9 @@ string_query_constant_p(VALUE self, VALUE string) {
 }
 
 /**
+ * :markup: markdown
  * call-seq:
- *   Prism::StringQuery::method_name?(string) -> bool
+ *   method_name?(string) -> bool
  *
  * Returns true if the string constitutes a valid method name.
  */
@@ -1356,6 +1433,8 @@ Init_prism(void) {
     rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);
     rb_cPrismScope = rb_define_class_under(rb_cPrism, "Scope", rb_cObject);
 
+    rb_cPrismCurrentVersionError = rb_const_get(rb_cPrism, rb_intern("CurrentVersionError"));
+
     // Intern all of the IDs eagerly that we support so that we don't have to do
     // it every time we parse.
     rb_id_option_command_line = rb_intern_const("command_line");
@@ -1407,5 +1486,4 @@ Init_prism(void) {
 
     // Next, initialize the other APIs.
     Init_prism_api_node();
-    Init_prism_pack();
 }
diff --git a/prism/extension.h b/prism/extension.h
index 506da2fd6f..d0cbc2ff53 100644
--- a/prism/extension.h
+++ b/prism/extension.h
@@ -1,10 +1,11 @@
 #ifndef PRISM_EXT_NODE_H
 #define PRISM_EXT_NODE_H
 
-#define EXPECTED_PRISM_VERSION "1.4.0"
+#define EXPECTED_PRISM_VERSION "1.9.0"
 
 #include <ruby.h>
 #include <ruby/encoding.h>
+#include <ruby/version.h>
 #include "prism.h"
 
 VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze);
@@ -13,7 +14,6 @@ VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *
 VALUE pm_integer_new(const pm_integer_t *integer);
 
 void Init_prism_api_node(void);
-void Init_prism_pack(void);
 RUBY_FUNC_EXPORTED void Init_prism(void);
 
 #endif
diff --git a/prism/util/pm_integer.c b/prism/integer.c
index 4170ecc58d..1b69dbdceb 100644
--- a/prism/util/pm_integer.c
+++ b/prism/integer.c
@@ -1,4 +1,25 @@
-#include "prism/util/pm_integer.h"
+#include "prism/internal/integer.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single uint32_t.
+ */
+static void
+pm_integer_free(pm_integer_t *integer) {
+    if (integer->values) {
+        xfree(integer->values);
+    }
+}
 
 /**
  * Pull out the length and values from the integer, regardless of the form in
@@ -374,7 +395,7 @@ pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, u
             }
         }
 
-        xfree(bigints);
+        xfree_sized(bigints, bigints_length * sizeof(pm_integer_t));
         bigints = next_bigints;
         bigints_length = next_length;
     }
@@ -383,7 +404,7 @@ pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, u
     destination->negative = source->negative;
     pm_integer_normalize(destination);
 
-    xfree(bigints);
+    xfree_sized(bigints, bigints_length * sizeof(pm_integer_t));
     pm_integer_free(&base);
 }
 
@@ -422,7 +443,7 @@ pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *dig
 static void
 pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
     const size_t batch = 9;
-    size_t length = (digits_length + batch - 1) / batch;
+    const size_t length = (digits_length + batch - 1) / batch;
 
     uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
     uint32_t value = 0;
@@ -439,7 +460,7 @@ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t di
 
     // Convert base from 10**9 to 1<<32.
     pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values,  .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
-    xfree(values);
+    xfree_sized(values, length * sizeof(uint32_t));
 }
 
 /**
@@ -448,7 +469,8 @@ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t di
 static void
 pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
     // Allocate an array to store digits.
-    uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
+    const size_t digits_capa = sizeof(uint8_t) * (size_t) (end - start);
+    uint8_t *digits = xmalloc(digits_capa);
     size_t digits_length = 0;
 
     for (; start < end; start++) {
@@ -463,7 +485,7 @@ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *
         pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
     }
 
-    xfree(digits);
+    xfree_sized(digits, digits_capa);
 }
 
 /**
@@ -603,7 +625,7 @@ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
 /**
  * Convert an integer to a decimal string.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
     if (integer->negative) {
         pm_buffer_append_byte(buffer, '-');
@@ -635,7 +657,7 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
     }
 
     // Allocate a buffer that we'll copy the decimal digits into.
-    size_t digits_length = converted.length * 9;
+    const size_t digits_length = converted.length * 9;
     char *digits = xcalloc(digits_length, sizeof(char));
     if (digits == NULL) return;
 
@@ -654,17 +676,6 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
 
     // Finally, append the string to the buffer and free the digits.
     pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
-    xfree(digits);
+    xfree_sized(digits, sizeof(char) * digits_length);
     pm_integer_free(&converted);
 }
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single uint32_t.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_integer_free(pm_integer_t *integer) {
-    if (integer->values) {
-        xfree(integer->values);
-    }
-}
diff --git a/prism/integer.h b/prism/integer.h
new file mode 100644
index 0000000000..9285986885
--- /dev/null
+++ b/prism/integer.h
@@ -0,0 +1,41 @@
+/**
+ * @file integer.h
+ *
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_INTEGER_H
+#define PRISM_INTEGER_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A structure represents an arbitrary-sized integer.
+ */
+typedef struct {
+    /**
+     * The number of allocated values. length is set to 0 if the integer fits
+     * into uint32_t.
+     */
+    size_t length;
+
+    /**
+     * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
+     */
+    uint32_t *values;
+
+    /**
+     * Embedded value for small integer. This value is set to 0 if the value
+     * does not fit into uint32_t.
+     */
+    uint32_t value;
+
+    /**
+     * Whether or not the integer is negative. It is stored this way so that a
+     * zeroed pm_integer_t is always positive zero.
+     */
+    bool negative;
+} pm_integer_t;
+
+#endif
diff --git a/prism/internal/allocator.h b/prism/internal/allocator.h
new file mode 100644
index 0000000000..6c54010dbf
--- /dev/null
+++ b/prism/internal/allocator.h
@@ -0,0 +1,68 @@
+#ifndef PRISM_INTERNAL_ALLOCATOR_H
+#define PRISM_INTERNAL_ALLOCATOR_H
+
+/* If you build Prism with a custom allocator, configure it with
+ * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
+ * xrealloc, xcalloc, and xfree.
+ *
+ * For example, your `prism_xallocator.h` file could look like this:
+ *
+ * ```
+ * #ifndef PRISM_XALLOCATOR_H
+ * #define PRISM_XALLOCATOR_H
+ * #define xmalloc          my_malloc
+ * #define xrealloc         my_realloc
+ * #define xcalloc          my_calloc
+ * #define xfree            my_free
+ * #define xrealloc_sized   my_realloc_sized // (optional)
+ * #define xfree_sized      my_free_sized    // (optional)
+ * #endif
+ * ```
+ */
+#ifdef PRISM_XALLOCATOR
+    #include "prism_xallocator.h"
+#else
+    #ifndef xmalloc
+        /* The malloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define. */
+        #define xmalloc malloc
+    #endif
+
+    #ifndef xrealloc
+        /* The realloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define. */
+        #define xrealloc realloc
+    #endif
+
+    #ifndef xcalloc
+        /* The calloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define. */
+        #define xcalloc calloc
+    #endif
+
+    #ifndef xfree
+        /* The free function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define. */
+        #define xfree free
+    #endif
+#endif
+
+#ifndef xfree_sized
+    /* The free_sized function that should be used. This can be overridden with
+     * the PRISM_XALLOCATOR define. If not defined, defaults to calling xfree.
+     */
+    #define xfree_sized(p, s) xfree(((void)(s), (p)))
+#endif
+
+#ifndef xrealloc_sized
+    /* The xrealloc_sized function that should be used. This can be overridden
+     * with the PRISM_XALLOCATOR define. If not defined, defaults to calling
+     * xrealloc. */
+    #define xrealloc_sized(p, ns, os) xrealloc((p), ((void)(os), (ns)))
+#endif
+
+#ifdef PRISM_BUILD_DEBUG
+    #include "prism/internal/allocator_debug.h"
+#endif
+
+#endif
diff --git a/prism/internal/allocator_debug.h b/prism/internal/allocator_debug.h
new file mode 100644
index 0000000000..846e96ba2d
--- /dev/null
+++ b/prism/internal/allocator_debug.h
@@ -0,0 +1,88 @@
+#ifndef PRISM_INTERNAL_ALLOCATOR_DEBUG_H
+#define PRISM_INTERNAL_ALLOCATOR_DEBUG_H
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void *
+pm_allocator_debug_malloc(size_t size) {
+    size_t *memory = xmalloc(size + sizeof(size_t));
+    memory[0] = size;
+    return memory + 1;
+}
+
+static inline void *
+pm_allocator_debug_calloc(size_t nmemb, size_t size) {
+    size_t total_size = nmemb * size;
+    void *ptr = pm_allocator_debug_malloc(total_size);
+    memset(ptr, 0, total_size);
+    return ptr;
+}
+
+static inline void *
+pm_allocator_debug_realloc(void *ptr, size_t size) {
+    if (ptr == NULL) {
+        return pm_allocator_debug_malloc(size);
+    }
+
+    size_t *memory = (size_t *)ptr;
+    void *raw_memory = memory - 1;
+    memory = (size_t *)xrealloc(raw_memory, size + sizeof(size_t));
+    memory[0] = size;
+    return memory + 1;
+}
+
+static inline void
+pm_allocator_debug_free(void *ptr) {
+    if (ptr != NULL) {
+        size_t *memory = (size_t *)ptr;
+        xfree(memory - 1);
+    }
+}
+
+static inline void
+pm_allocator_debug_free_sized(void *ptr, size_t old_size) {
+    if (ptr != NULL) {
+        size_t *memory = (size_t *)ptr;
+        if (old_size != memory[-1]) {
+            fprintf(stderr, "[BUG] buffer %p was allocated with size %lu but freed with size %lu\n", ptr, memory[-1], old_size);
+            abort();
+        }
+        xfree_sized(memory - 1, old_size + sizeof(size_t));
+    }
+}
+
+static inline void *
+pm_allocator_debug_realloc_sized(void *ptr, size_t size, size_t old_size) {
+    if (ptr == NULL) {
+        if (old_size != 0) {
+            fprintf(stderr, "[BUG] realloc_sized called with NULL pointer and old size %lu\n", old_size);
+            abort();
+        }
+        return pm_allocator_debug_malloc(size);
+    }
+
+    size_t *memory = (size_t *)ptr;
+    if (old_size != memory[-1]) {
+        fprintf(stderr, "[BUG] buffer %p was allocated with size %lu but realloced with size %lu\n", ptr, memory[-1], old_size);
+        abort();
+    }
+    return pm_allocator_debug_realloc(ptr, size);
+}
+
+#undef xmalloc
+#undef xrealloc
+#undef xcalloc
+#undef xfree
+#undef xrealloc_sized
+#undef xfree_sized
+
+#define xmalloc          pm_allocator_debug_malloc
+#define xrealloc         pm_allocator_debug_realloc
+#define xcalloc          pm_allocator_debug_calloc
+#define xfree            pm_allocator_debug_free
+#define xrealloc_sized   pm_allocator_debug_realloc_sized
+#define xfree_sized      pm_allocator_debug_free_sized
+
+#endif
diff --git a/prism/internal/arena.h b/prism/internal/arena.h
new file mode 100644
index 0000000000..2e413b42bf
--- /dev/null
+++ b/prism/internal/arena.h
@@ -0,0 +1,108 @@
+#ifndef PRISM_INTERNAL_ARENA_H
+#define PRISM_INTERNAL_ARENA_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/flex_array.h"
+#include "prism/compiler/force_inline.h"
+#include "prism/compiler/inline.h"
+
+#include "prism/arena.h"
+
+#include <stddef.h>
+#include <string.h>
+
+/*
+ * A single block of memory in the arena. Blocks are linked via prev pointers so
+ * they can be freed by walking the chain.
+ */
+typedef struct pm_arena_block {
+    /* The previous block in the chain (for freeing). */
+    struct pm_arena_block *prev;
+
+    /* The total usable bytes in data[]. */
+    size_t capacity;
+
+    /* The number of bytes consumed so far. */
+    size_t used;
+
+    /* The block's data. */
+    char data[PM_FLEX_ARRAY_LENGTH];
+} pm_arena_block_t;
+
+/*
+ * A bump allocator. Allocations are made by bumping a pointer within the
+ * current block. When a block is full, a new block is allocated and linked to
+ * the previous one. All blocks are freed at once by walking the chain.
+ */
+struct pm_arena_t {
+    /* The active block (allocate from here). */
+    pm_arena_block_t *current;
+
+    /* The number of blocks allocated. */
+    size_t block_count;
+};
+
+/*
+ * Free all blocks in the arena. After this call, all pointers returned by
+ * pm_arena_alloc and pm_arena_zalloc are invalid.
+ */
+void pm_arena_cleanup(pm_arena_t *arena);
+
+/*
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ */
+void pm_arena_reserve(pm_arena_t *arena, size_t capacity);
+
+/*
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Do not call directly — use pm_arena_alloc instead.
+ */
+void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size);
+
+/*
+ * Allocate memory from the arena. The returned memory is NOT zeroed. This
+ * function is infallible — it aborts on allocation failure.
+ *
+ * The fast path (bump pointer within the current block) is inlined at each
+ * call site. The slow path (new block allocation) is out-of-line.
+ */
+static PRISM_FORCE_INLINE void *
+pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
+    if (arena->current != NULL) {
+        size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1);
+        size_t needed = used_aligned + size;
+
+        if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) {
+            arena->current->used = needed;
+            return arena->current->data + used_aligned;
+        }
+    }
+
+    return pm_arena_alloc_slow(arena, size);
+}
+
+/*
+ * Allocate zero-initialized memory from the arena. This function is infallible
+ * — it aborts on allocation failure.
+ */
+static PRISM_INLINE void *
+pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) {
+    void *ptr = pm_arena_alloc(arena, size, alignment);
+    memset(ptr, 0, size);
+    return ptr;
+}
+
+/*
+ * Allocate memory from the arena and copy the given data into it. This is a
+ * convenience wrapper around pm_arena_alloc + memcpy.
+ */
+static PRISM_INLINE void *
+pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) {
+    void *dst = pm_arena_alloc(arena, size, alignment);
+    memcpy(dst, src, size);
+    return dst;
+}
+
+#endif
diff --git a/prism/internal/bit.h b/prism/internal/bit.h
new file mode 100644
index 0000000000..b0111a4c2c
--- /dev/null
+++ b/prism/internal/bit.h
@@ -0,0 +1,42 @@
+#ifndef PRISM_INTERNAL_BIT_H
+#define PRISM_INTERNAL_BIT_H
+
+#include "prism/compiler/inline.h"
+
+/*
+ * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning
+ * to find the first non-matching byte in a word.
+ *
+ * Precondition: v must be nonzero. The result is undefined when v == 0
+ * (matching the behavior of __builtin_ctzll and _BitScanForward64).
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#define pm_ctzll(v) ((unsigned) __builtin_ctzll(v))
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#include <stdint.h>
+
+static PRISM_INLINE unsigned
+pm_ctzll(uint64_t v) {
+    unsigned long index;
+    _BitScanForward64(&index, v);
+    return (unsigned) index;
+}
+#else
+#include <stdint.h>
+
+static PRISM_INLINE unsigned
+pm_ctzll(uint64_t v) {
+    unsigned c = 0;
+    v &= (uint64_t) (-(int64_t) v);
+    if (v & 0x00000000FFFFFFFFULL) c += 0;  else c += 32;
+    if (v & 0x0000FFFF0000FFFFULL) c += 0;  else c += 16;
+    if (v & 0x00FF00FF00FF00FFULL) c += 0;  else c += 8;
+    if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0;  else c += 4;
+    if (v & 0x3333333333333333ULL) c += 0;  else c += 2;
+    if (v & 0x5555555555555555ULL) c += 0;  else c += 1;
+    return c;
+}
+#endif
+
+#endif
diff --git a/prism/internal/buffer.h b/prism/internal/buffer.h
new file mode 100644
index 0000000000..a849bbf8e6
--- /dev/null
+++ b/prism/internal/buffer.h
@@ -0,0 +1,91 @@
+#ifndef PRISM_INTERNAL_BUFFER_H
+#define PRISM_INTERNAL_BUFFER_H
+
+#include "prism/compiler/format.h"
+
+#include "prism/buffer.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/*
+ * A simple memory buffer that stores data in a contiguous block of memory.
+ */
+struct pm_buffer_t {
+    /* The length of the buffer in bytes. */
+    size_t length;
+
+    /* The capacity of the buffer in bytes that has been allocated. */
+    size_t capacity;
+
+    /* A pointer to the start of the buffer. */
+    char *value;
+};
+
+/* Initialize a pm_buffer_t with the given capacity. */
+void pm_buffer_init(pm_buffer_t *buffer, size_t capacity);
+
+/* Free the memory held by the buffer. */
+void pm_buffer_cleanup(pm_buffer_t *buffer);
+
+/* Append the given amount of space as zeroes to the buffer. */
+void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
+
+/* Append a formatted string to the buffer. */
+void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
+
+/* Append a string to the buffer. */
+void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/* Append a list of bytes to the buffer. */
+void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
+
+/* Append a single byte to the buffer. */
+void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
+
+/* Append a 32-bit unsigned integer to the buffer as a variable-length integer. */
+void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
+
+/* Append a 32-bit signed integer to the buffer as a variable-length integer. */
+void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
+
+/* Append a double to the buffer. */
+void pm_buffer_append_double(pm_buffer_t *buffer, double value);
+
+/* Append a unicode codepoint to the buffer. */
+bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
+
+/*
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+    PM_BUFFER_ESCAPING_RUBY,
+    PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/* Append a slice of source code to the buffer. */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/* Prepend the given string to the buffer. */
+void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/* Concatenate one buffer onto another. */
+void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
+
+/*
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ */
+void pm_buffer_clear(pm_buffer_t *buffer);
+
+/* Strip the whitespace from the end of the buffer. */
+void pm_buffer_rstrip(pm_buffer_t *buffer);
+
+/* Checks if the buffer includes the given value. */
+size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/* Insert the given string into the buffer at the given index. */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+#endif
diff --git a/prism/internal/char.h b/prism/internal/char.h
new file mode 100644
index 0000000000..9a58fba8c5
--- /dev/null
+++ b/prism/internal/char.h
@@ -0,0 +1,139 @@
+#ifndef PRISM_INTERNAL_CHAR_H
+#define PRISM_INTERNAL_CHAR_H
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+#include "prism/line_offset_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Bit flag for whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
+
+/* Bit flag for inline whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+
+/*
+ * A lookup table for classifying bytes. Each entry is a bitfield of
+ * PRISM_CHAR_BIT_* flags. Defined in char.c.
+ */
+extern const uint8_t pm_byte_table[256];
+
+/* Returns true if the given character is a whitespace character. */
+static PRISM_FORCE_INLINE bool
+pm_char_is_whitespace(const uint8_t b) {
+    return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0;
+}
+
+/* Returns true if the given character is an inline whitespace character. */
+static PRISM_FORCE_INLINE bool
+pm_char_is_inline_whitespace(const uint8_t b) {
+    return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0;
+}
+
+/*
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace (space/tab). Scans the byte table directly for use in hot paths.
+ */
+static PRISM_FORCE_INLINE size_t
+pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+    if (length <= 0) return 0;
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_INLINE_WHITESPACE)) size++;
+    return size;
+}
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ */
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset);
+
+/*
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ */
+size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+
+/* Returns true if the given character is a binary digit. */
+bool pm_char_is_binary_digit(const uint8_t b);
+
+/* Returns true if the given character is an octal digit. */
+bool pm_char_is_octal_digit(const uint8_t b);
+
+/* Returns true if the given character is a decimal digit. */
+bool pm_char_is_decimal_digit(const uint8_t b);
+
+/* Returns true if the given character is a hexadecimal digit. */
+bool pm_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/internal/comments.h b/prism/internal/comments.h
new file mode 100644
index 0000000000..bb3039a658
--- /dev/null
+++ b/prism/internal/comments.h
@@ -0,0 +1,20 @@
+#ifndef PRISM_INTERNAL_COMMENTS_H
+#define PRISM_INTERNAL_COMMENTS_H
+
+#include "prism/comments.h"
+
+#include "prism/internal/list.h"
+
+/* A comment found while parsing. */
+struct pm_comment_t {
+    /* The embedded base node. */
+    pm_list_node_t node;
+
+    /* The location of the comment in the source. */
+    pm_location_t location;
+
+    /* The type of the comment. */
+    pm_comment_type_t type;
+};
+
+#endif
diff --git a/prism/internal/constant_pool.h b/prism/internal/constant_pool.h
new file mode 100644
index 0000000000..fa2be783f5
--- /dev/null
+++ b/prism/internal/constant_pool.h
@@ -0,0 +1,117 @@
+#ifndef PRISM_INTERNAL_CONSTANT_POOL_H
+#define PRISM_INTERNAL_CONSTANT_POOL_H
+
+#include "prism/constant_pool.h"
+
+#include "prism/arena.h"
+
+#include <stdbool.h>
+
+/* A constant in the pool which effectively stores a string. */
+struct pm_constant_t {
+    /* A pointer to the start of the string. */
+    const uint8_t *start;
+
+    /* The length of the string. */
+    size_t length;
+};
+
+/*
+ * The type of bucket in the constant pool hash map. This determines how the
+ * bucket should be freed.
+ */
+typedef unsigned int pm_constant_pool_bucket_type_t;
+
+/* By default, each constant is a slice of the source. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
+
+/* An owned constant is one for which memory has been allocated. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
+
+/* A constant constant is known at compile time. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
+
+/* A bucket in the hash map. */
+typedef struct {
+    /* The incremental ID used for indexing back into the pool. */
+    unsigned int id: 30;
+
+    /* The type of the bucket, which determines how to free it. */
+    pm_constant_pool_bucket_type_t type: 2;
+
+    /* The hash of the bucket. */
+    uint32_t hash;
+
+    /*
+     * A pointer to the start of the string, stored directly in the bucket to
+     * avoid a pointer chase to the constants array during probing.
+     */
+    const uint8_t *start;
+
+    /* The length of the string. */
+    size_t length;
+} pm_constant_pool_bucket_t;
+
+/* The overall constant pool, which stores constants found while parsing. */
+struct pm_constant_pool_t {
+    /* The buckets in the hash map. */
+    pm_constant_pool_bucket_t *buckets;
+
+    /* The constants that are stored in the buckets. */
+    pm_constant_t *constants;
+
+    /* The number of buckets in the hash map. */
+    uint32_t size;
+
+    /* The number of buckets that have been allocated in the hash map. */
+    uint32_t capacity;
+};
+
+/*
+ * When we allocate constants into the pool, we reserve 0 to mean that the slot
+ * is not yet filled. This constant is reused in other places to indicate the
+ * lack of a constant id.
+ */
+#define PM_CONSTANT_ID_UNSET 0
+
+/* Initialize a list of constant ids with a given capacity. */
+void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity);
+
+/* Insert a constant id into a list of constant ids at the specified index. */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/* Checks if the current constant id list includes the given constant id. */
+bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/* Initialize a new constant pool with a given capacity. */
+void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity);
+
+/* Return a pointer to the constant indicated by the given constant id. */
+pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
+
+/*
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool that is a slice of a source string.
+ * Returns the id of the constant, or 0 if any potential calls to resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or 0 if any potential calls to
+ * resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or 0 if any potential calls to resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+#endif
diff --git a/prism/encoding.h b/prism/internal/encoding.h
index 5f7724821f..62392ef970 100644
--- a/prism/encoding.h
+++ b/prism/internal/encoding.h
@@ -1,128 +1,95 @@
-/**
- * @file encoding.h
- *
- * The encoding interface and implementations used by the parser.
- */
-#ifndef PRISM_ENCODING_H
-#define PRISM_ENCODING_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_strncasecmp.h"
+#ifndef PRISM_INTERNAL_ENCODING_H
+#define PRISM_INTERNAL_ENCODING_H
 
-#include <assert.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
-/**
+/*
  * This struct defines the functions necessary to implement the encoding
  * interface so we can determine how many bytes the subsequent character takes.
  * Each callback should return the number of bytes, or 0 if the next bytes are
  * invalid for the encoding and type.
  */
 typedef struct {
-    /**
+    /*
      * Return the number of bytes that the next character takes if it is valid
      * in the encoding. Does not read more than n bytes. It is assumed that n is
      * at least 1.
      */
     size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
 
-    /**
+    /*
      * Return the number of bytes that the next character takes if it is valid
      * in the encoding and is alphabetical. Does not read more than n bytes. It
      * is assumed that n is at least 1.
      */
     size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
 
-    /**
+    /*
      * Return the number of bytes that the next character takes if it is valid
      * in the encoding and is alphanumeric. Does not read more than n bytes. It
      * is assumed that n is at least 1.
      */
     size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
 
-    /**
+    /*
      * Return true if the next character is valid in the encoding and is an
      * uppercase character. Does not read more than n bytes. It is assumed that
      * n is at least 1.
      */
     bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
 
-    /**
+    /*
      * The name of the encoding. This should correspond to a value that can be
      * passed to Encoding.find in Ruby.
      */
     const char *name;
 
-    /**
-     * Return true if the encoding is a multibyte encoding.
-     */
+    /* Return true if the encoding is a multibyte encoding. */
     bool multibyte;
 } pm_encoding_t;
 
-/**
+/*
  * All of the lookup tables use the first bit of each embedded byte to indicate
  * whether the codepoint is alphabetical.
  */
 #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
 
-/**
+/*
  * All of the lookup tables use the second bit of each embedded byte to indicate
  * whether the codepoint is alphanumeric.
  */
 #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
 
-/**
+/*
  * All of the lookup tables use the third bit of each embedded byte to indicate
  * whether the codepoint is uppercase.
  */
 #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
 
-/**
- * Return the size of the next character in the UTF-8 encoding.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- *     the encoding, or 0 if it is not.
- */
+/* Return the size of the next character in the UTF-8 encoding. */
 size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
 
-/**
+/*
  * Return the size of the next character in the UTF-8 encoding if it is an
  * alphabetical character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- *     the encoding, or 0 if it is not.
  */
 size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
 
-/**
+/*
  * Return the size of the next character in the UTF-8 encoding if it is an
  * alphanumeric character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- *     the encoding, or 0 if it is not.
  */
 size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
 
-/**
+/*
  * Return true if the next character in the UTF-8 encoding if it is an uppercase
  * character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns True if the next character is valid in the encoding and is an
- *     uppercase character, or false if it is not.
  */
 bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
 
-/**
+/*
  * This lookup table is referenced in both the UTF-8 encoding file and the
  * parser directly in order to speed up the default encoding processing. It is
  * used to indicate whether a character is alphabetical, alphanumeric, or
@@ -130,9 +97,7 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
  */
 extern const uint8_t pm_encoding_unicode_table[256];
 
-/**
- * These are all of the encodings that prism supports.
- */
+/* These are all of the encodings that prism supports. */
 typedef enum {
     PM_ENCODING_UTF_8 = 0,
     PM_ENCODING_US_ASCII,
@@ -140,8 +105,8 @@ typedef enum {
     PM_ENCODING_EUC_JP,
     PM_ENCODING_WINDOWS_31J,
 
-// We optionally support excluding the full set of encodings to only support the
-// minimum necessary to process Ruby code without encoding comments.
+/* We optionally support excluding the full set of encodings to only support the
+ * minimum necessary to process Ruby code without encoding comments. */
 #ifndef PRISM_ENCODING_EXCLUDE_FULL
     PM_ENCODING_BIG5,
     PM_ENCODING_BIG5_HKSCS,
@@ -233,50 +198,44 @@ typedef enum {
     PM_ENCODING_MAXIMUM
 } pm_encoding_type_t;
 
-/**
- * This is the table of all of the encodings that prism supports.
- */
+/* This is the table of all of the encodings that prism supports. */
 extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
 
-/**
+/*
  * This is the default UTF-8 encoding. We need a reference to it to quickly
  * create parsers.
  */
 #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
 
-/**
+/*
  * This is the US-ASCII encoding. We need a reference to it to be able to
  * compare against it when a string is being created because it could possibly
  * need to fall back to ASCII-8BIT.
  */
 #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
 
-/**
+/*
  * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
  * can compare against it because invalid multibyte characters are not a thing
  * in this encoding. It is also needed for handling Regexp encoding flags.
  */
 #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
 
-/**
+/*
  * This is the EUC-JP encoding. We need a reference to it to quickly process
  * regular expression modifiers.
  */
 #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
 
-/**
+/*
  * This is the Windows-31J encoding. We need a reference to it to quickly
  * process regular expression modifiers.
  */
 #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
 
-/**
+/*
  * Parse the given name of an encoding and return a pointer to the corresponding
  * encoding struct if one can be found, otherwise return NULL.
- *
- * @param start A pointer to the first byte of the name.
- * @param end A pointer to the last byte of the name.
- * @returns A pointer to the encoding struct if one is found, otherwise NULL.
  */
 const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
 
diff --git a/prism/internal/integer.h b/prism/internal/integer.h
new file mode 100644
index 0000000000..7c9767e323
--- /dev/null
+++ b/prism/internal/integer.h
@@ -0,0 +1,68 @@
+/*
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_INTERNAL_INTEGER_H
+#define PRISM_INTERNAL_INTEGER_H
+
+#include "prism/buffer.h"
+#include "prism/integer.h"
+
+#include <stdint.h>
+
+/*
+ * An enum controlling the base of an integer. It is expected that the base is
+ * already known before parsing the integer, even though it could be derived
+ * from the string itself.
+ */
+typedef enum {
+    /* The default decimal base, with no prefix. Leading 0s will be ignored. */
+    PM_INTEGER_BASE_DEFAULT,
+
+    /* The binary base, indicated by a 0b or 0B prefix. */
+    PM_INTEGER_BASE_BINARY,
+
+    /* The octal base, indicated by a 0, 0o, or 0O prefix. */
+    PM_INTEGER_BASE_OCTAL,
+
+    /* The decimal base, indicated by a 0d, 0D, or empty prefix. */
+    PM_INTEGER_BASE_DECIMAL,
+
+    /* The hexadecimal base, indicated by a 0x or 0X prefix. */
+    PM_INTEGER_BASE_HEXADECIMAL,
+
+    /*
+     * An unknown base, in which case pm_integer_parse will derive it based on
+     * the content of the string. This is less efficient and does more
+     * comparisons, so if callers know the base ahead of time, they should use
+     * that instead.
+     */
+    PM_INTEGER_BASE_UNKNOWN
+} pm_integer_base_t;
+
+/*
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ */
+void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
+
+/*
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/*
+ * Reduce a ratio of integers to its simplest form.
+ *
+ * If either the numerator or denominator do not fit into a 32-bit integer, then
+ * this function is a no-op. In the future, we may consider reducing even the
+ * larger numbers, but for now we're going to keep it simple.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
+
+/* Convert an integer to a decimal string. */
+void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
+
+#endif
diff --git a/prism/internal/isinf.h b/prism/internal/isinf.h
new file mode 100644
index 0000000000..41c160f56d
--- /dev/null
+++ b/prism/internal/isinf.h
@@ -0,0 +1,16 @@
+#ifndef PRISM_INTERNAL_ISINF_H
+#define PRISM_INTERNAL_ISINF_H
+
+/*
+ * isinf on POSIX systems accepts a float, a double, or a long double. But mingw
+ * didn't provide an isinf macro, only an isinf function that only accepts
+ * floats, so we need to use _finite instead.
+ */
+#ifdef __MINGW64__
+    #include <float.h>
+    #define PRISM_ISINF(x) (!_finite(x))
+#else
+    #define PRISM_ISINF(x) isinf(x)
+#endif
+
+#endif
diff --git a/prism/internal/line_offset_list.h b/prism/internal/line_offset_list.h
new file mode 100644
index 0000000000..dac9f7052e
--- /dev/null
+++ b/prism/internal/line_offset_list.h
@@ -0,0 +1,34 @@
+#ifndef PRISM_INTERNAL_LINE_OFFSET_LIST_H
+#define PRISM_INTERNAL_LINE_OFFSET_LIST_H
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+#include "prism/line_offset_list.h"
+
+/* Initialize a new line offset list with the given capacity. */
+void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity);
+
+/* Clear out the offsets that have been appended to the list. */
+void pm_line_offset_list_clear(pm_line_offset_list_t *list);
+
+/* Append a new offset to the list (slow path with resize). */
+void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor);
+
+/* Append a new offset to the list. */
+static PRISM_FORCE_INLINE void
+pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+    if (list->size < list->capacity) {
+        list->offsets[list->size++] = cursor;
+    } else {
+        pm_line_offset_list_append_slow(arena, list, cursor);
+    }
+}
+
+/*
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line);
+
+#endif
diff --git a/prism/util/pm_list.h b/prism/internal/list.h
index 3512dee979..0ab59ef32a 100644
--- a/prism/util/pm_list.h
+++ b/prism/internal/list.h
@@ -1,19 +1,9 @@
-/**
- * @file pm_list.h
- *
- * An abstract linked list.
- */
-#ifndef PRISM_LIST_H
-#define PRISM_LIST_H
+#ifndef PRISM_INTERNAL_LIST_H
+#define PRISM_INTERNAL_LIST_H
 
-#include "prism/defines.h"
-
-#include <stdbool.h>
 #include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
 
-/**
+/*
  * This struct represents an abstract linked list that provides common
  * functionality. It is meant to be used any time a linked list is necessary to
  * store data.
@@ -44,54 +34,29 @@
  * iteration and appending of new nodes.
  */
 typedef struct pm_list_node {
-    /** A pointer to the next node in the list. */
+    /* A pointer to the next node in the list. */
     struct pm_list_node *next;
 } pm_list_node_t;
 
-/**
+/*
  * This represents the overall linked list. It keeps a pointer to the head and
  * tail so that iteration is easy and pushing new nodes is easy.
  */
 typedef struct {
-    /** The size of the list. */
+    /* The size of the list. */
     size_t size;
 
-    /** A pointer to the head of the list. */
+    /* A pointer to the head of the list. */
     pm_list_node_t *head;
 
-    /** A pointer to the tail of the list. */
+    /* A pointer to the tail of the list. */
     pm_list_node_t *tail;
 } pm_list_t;
 
-/**
- * Returns true if the given list is empty.
- *
- * @param list The list to check.
- * @return True if the given list is empty, otherwise false.
- */
-PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
+/* Returns the size of the list. */
+size_t pm_list_size(pm_list_t *list);
 
-/**
- * Returns the size of the list.
- *
- * @param list The list to check.
- * @return The size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
-
-/**
- * Append a node to the given list.
- *
- * @param list The list to append to.
- * @param node The node to append.
- */
+/* Append a node to the given list. */
 void pm_list_append(pm_list_t *list, pm_list_node_t *node);
 
-/**
- * Deallocate the internal state of the given list.
- *
- * @param list The list to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
-
 #endif
diff --git a/prism/internal/magic_comments.h b/prism/internal/magic_comments.h
new file mode 100644
index 0000000000..72a581c5d7
--- /dev/null
+++ b/prism/internal/magic_comments.h
@@ -0,0 +1,23 @@
+#ifndef PRISM_INTERNAL_MAGIC_COMMENTS_H
+#define PRISM_INTERNAL_MAGIC_COMMENTS_H
+
+#include "prism/magic_comments.h"
+
+#include "prism/internal/list.h"
+
+/*
+ * This is a node in the linked list of magic comments that we've found while
+ * parsing.
+ */
+struct pm_magic_comment_t {
+    /* The embedded base node. */
+    pm_list_node_t node;
+
+    /* The key of the magic comment. */
+    pm_location_t key;
+
+    /* The value of the magic comment. */
+    pm_location_t value;
+};
+
+#endif
diff --git a/prism/internal/memchr.h b/prism/internal/memchr.h
new file mode 100644
index 0000000000..6f6b0bca30
--- /dev/null
+++ b/prism/internal/memchr.h
@@ -0,0 +1,15 @@
+#ifndef PRISM_INTERNAL_MEMCHR_H
+#define PRISM_INTERNAL_MEMCHR_H
+
+#include "prism/internal/encoding.h"
+
+#include <stddef.h>
+
+/*
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ */
+const void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
+
+#endif
diff --git a/prism/internal/node.h b/prism/internal/node.h
new file mode 100644
index 0000000000..ca6d5616d7
--- /dev/null
+++ b/prism/internal/node.h
@@ -0,0 +1,32 @@
+#ifndef PRISM_INTERNAL_NODE_H
+#define PRISM_INTERNAL_NODE_H
+
+#include "prism/node.h"
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+
+/*
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly — use pm_node_list_append instead.
+ */
+void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+
+/* Append a new node onto the end of the node list. */
+static PRISM_FORCE_INLINE void
+pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+    if (list->size < list->capacity) {
+        list->nodes[list->size++] = node;
+    } else {
+        pm_node_list_append_slow(arena, list, node);
+    }
+}
+
+/* Prepend a new node onto the beginning of the node list. */
+void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+
+/* Concatenate the given node list onto the end of the other node list. */
+void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other);
+
+#endif
diff --git a/prism/internal/options.h b/prism/internal/options.h
new file mode 100644
index 0000000000..7e37742a8b
--- /dev/null
+++ b/prism/internal/options.h
@@ -0,0 +1,212 @@
+#ifndef PRISM_INTERNAL_OPTIONS_H
+#define PRISM_INTERNAL_OPTIONS_H
+
+#include "prism/options.h"
+
+/* A scope of locals surrounding the code that is being parsed. */
+struct pm_options_scope_t {
+    /* The number of locals in the scope. */
+    size_t locals_count;
+
+    /* The names of the locals in the scope. */
+    pm_string_t *locals;
+
+    /* Flags for the set of forwarding parameters in this scope. */
+    uint8_t forwarding;
+};
+
+/*
+ * The version of Ruby syntax that we should be parsing with. This is used to
+ * allow consumers to specify which behavior they want in case they need to
+ * parse in the same way as a specific version of CRuby would have.
+ */
+typedef enum {
+    /*
+     * If an explicit version is not provided, the current version of prism will
+     * be used.
+     */
+    PM_OPTIONS_VERSION_UNSET = 0,
+
+    /* The vendored version of prism in CRuby 3.3.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
+
+    /* The vendored version of prism in CRuby 3.4.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_4 = 2,
+
+    /* The vendored version of prism in CRuby 4.0.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_5 = 3,
+
+    /* The vendored version of prism in CRuby 4.0.x. */
+    PM_OPTIONS_VERSION_CRUBY_4_0 = 3,
+
+    /* The vendored version of prism in CRuby 4.1.x. */
+    PM_OPTIONS_VERSION_CRUBY_4_1 = 4,
+
+    /* The current version of prism. */
+    PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1
+} pm_options_version_t;
+
+/* The options that can be passed to the parser. */
+struct pm_options_t {
+    /*
+     * The callback to call when additional switches are found in a shebang
+     * comment.
+     */
+    pm_options_shebang_callback_t shebang_callback;
+
+    /*
+     * Any additional data that should be passed along to the shebang callback
+     * if one was set.
+     */
+    void *shebang_callback_data;
+
+    /* The name of the file that is currently being parsed. */
+    pm_string_t filepath;
+
+    /*
+     * The line within the file that the parse starts on. This value is
+     * 1-indexed.
+     */
+    int32_t line;
+
+    /*
+     * The name of the encoding that the source file is in. Note that this must
+     * correspond to a name that can be found with Encoding.find in Ruby.
+     */
+    pm_string_t encoding;
+
+    /* The number of scopes surrounding the code that is being parsed. */
+    size_t scopes_count;
+
+    /*
+     * The scopes surrounding the code that is being parsed. For most parses
+     * this will be NULL, but for evals it will be the locals that are in scope
+     * surrounding the eval. Scopes are ordered from the outermost scope to the
+     * innermost one.
+     */
+    pm_options_scope_t *scopes;
+
+    /*
+     * The version of prism that we should be parsing with. This is used to
+     * allow consumers to specify which behavior they want in case they need to
+     * parse exactly as a specific version of CRuby.
+     */
+    pm_options_version_t version;
+
+    /* A bitset of the various options that were set on the command line. */
+    uint8_t command_line;
+
+    /*
+    * Whether or not the frozen string literal option has been set.
+    * May be:
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+    */
+    int8_t frozen_string_literal;
+
+    /*
+     * Whether or not the encoding magic comments should be respected. This is a
+     * niche use-case where you want to parse a file with a specific encoding
+     * but ignore any encoding magic comments at the top of the file.
+     */
+    bool encoding_locked;
+
+    /*
+     * When the file being parsed is the main script, the shebang will be
+     * considered for command-line flags (or for implicit -x). The caller needs
+     * to pass this information to the parser so that it can behave correctly.
+     */
+    bool main_script;
+
+    /*
+     * When the file being parsed is considered a "partial" script, jumps will
+     * not be marked as errors if they are not contained within loops/blocks.
+     * This is used in the case that you're parsing a script that you know will
+     * be embedded inside another script later, but you do not have that context
+     * yet. For example, when parsing an ERB template that will be evaluated
+     * inside another script.
+     */
+    bool partial_script;
+
+    /*
+     * Whether or not the parser should freeze the nodes that it creates. This
+     * makes it possible to have a deeply frozen AST that is safe to share
+     * between concurrency primitives.
+     */
+    bool freeze;
+};
+
+/* Free the internal memory associated with the options. */
+void pm_options_cleanup(pm_options_t *options);
+
+/*
+ * Deserialize an options struct from the given binary string. This is used to
+ * pass options to the parser from an FFI call so that consumers of the library
+ * from an FFI perspective don't have to worry about the structure of our
+ * options structs. Since the source of these calls will be from Ruby
+ * implementation internals we assume it is from a trusted source.
+ *
+ * `data` is assumed to be a valid pointer pointing to well-formed data. The
+ * layout of this data should be the same every time, and is described below:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the length of the filepath |
+ * | ...     | the filepath bytes         |
+ * | `4`     | the line number            |
+ * | `4`     | the length the encoding    |
+ * | ...     | the encoding bytes         |
+ * | `1`     | frozen string literal      |
+ * | `1`     | -p command line option     |
+ * | `1`     | -n command line option     |
+ * | `1`     | -l command line option     |
+ * | `1`     | -a command line option     |
+ * | `1`     | the version                |
+ * | `1`     | encoding locked            |
+ * | `1`     | main script                |
+ * | `1`     | partial script             |
+ * | `1`     | freeze                     |
+ * | `4`     | the number of scopes       |
+ * | ...     | the scopes                 |
+ *
+ * The version field is an enum, so it should be one of the following values:
+ *
+ * | value | version                   |
+ * | ----- | ------------------------- |
+ * | `0`   | use the latest version of prism |
+ * | `1`   | use the version of prism that is vendored in CRuby 3.3.0 |
+ * | `2`   | use the version of prism that is vendored in CRuby 3.4.0 |
+ * | `3`   | use the version of prism that is vendored in CRuby 4.0.0 |
+ * | `4`   | use the version of prism that is vendored in CRuby 4.1.0 |
+ *
+ * Each scope is laid out as follows:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the number of locals       |
+ * | `1`     | the forwarding flags       |
+ * | ...     | the locals                 |
+ *
+ * Each local is laid out as follows:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the length of the local    |
+ * | ...     | the local bytes            |
+ *
+ * Some additional things to note about this layout:
+ *
+ * * The filepath can have a length of 0, in which case we'll consider it an
+ *   empty string.
+ * * The line number should be 0-indexed.
+ * * The encoding can have a length of 0, in which case we'll use the default
+ *   encoding (UTF-8). If it's not 0, it should correspond to a name of an
+ *   encoding that can be passed to `Encoding.find` in Ruby.
+ * * The frozen string literal, encoding locked, main script, and partial script
+ *   fields are booleans, so their values should be either 0 or 1.
+ * * The number of scopes can be 0.
+ */
+void pm_options_read(pm_options_t *options, const char *data);
+
+#endif
diff --git a/prism/internal/parser.h b/prism/internal/parser.h
new file mode 100644
index 0000000000..4320cf4029
--- /dev/null
+++ b/prism/internal/parser.h
@@ -0,0 +1,958 @@
+#ifndef PRISM_INTERNAL_PARSER_H
+#define PRISM_INTERNAL_PARSER_H
+
+#include "prism/compiler/accel.h"
+
+#include "prism/internal/arena.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+#include "prism/internal/options.h"
+#include "prism/internal/static_literals.h"
+#include "prism/internal/strpbrk.h"
+
+#include "prism/ast.h"
+#include "prism/line_offset_list.h"
+#include "prism/parser.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * This enum provides various bits that represent different kinds of states that
+ * the lexer can track. This is used to determine which kind of token to return
+ * based on the context of the parser.
+ */
+typedef enum {
+    PM_LEX_STATE_BIT_BEG,
+    PM_LEX_STATE_BIT_END,
+    PM_LEX_STATE_BIT_ENDARG,
+    PM_LEX_STATE_BIT_ENDFN,
+    PM_LEX_STATE_BIT_ARG,
+    PM_LEX_STATE_BIT_CMDARG,
+    PM_LEX_STATE_BIT_MID,
+    PM_LEX_STATE_BIT_FNAME,
+    PM_LEX_STATE_BIT_DOT,
+    PM_LEX_STATE_BIT_CLASS,
+    PM_LEX_STATE_BIT_LABEL,
+    PM_LEX_STATE_BIT_LABELED,
+    PM_LEX_STATE_BIT_FITEM
+} pm_lex_state_bit_t;
+
+/*
+ * This enum combines the various bits from the above enum into individual
+ * values that represent the various states of the lexer.
+ */
+typedef enum {
+    PM_LEX_STATE_NONE = 0,
+    PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
+    PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
+    PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
+    PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
+    PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
+    PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
+    PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
+    PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
+    PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
+    PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
+    PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
+    PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
+    PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
+    PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
+    PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
+    PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
+} pm_lex_state_t;
+
+/*
+ * The type of quote that a heredoc uses.
+ */
+typedef enum {
+    PM_HEREDOC_QUOTE_NONE,
+    PM_HEREDOC_QUOTE_SINGLE = '\'',
+    PM_HEREDOC_QUOTE_DOUBLE = '"',
+    PM_HEREDOC_QUOTE_BACKTICK = '`',
+} pm_heredoc_quote_t;
+
+/*
+ * The type of indentation that a heredoc uses.
+ */
+typedef enum {
+    PM_HEREDOC_INDENT_NONE,
+    PM_HEREDOC_INDENT_DASH,
+    PM_HEREDOC_INDENT_TILDE,
+} pm_heredoc_indent_t;
+
+/*
+ * All of the information necessary to store to lexing a heredoc.
+ */
+typedef struct {
+    /* A pointer to the start of the heredoc identifier. */
+    const uint8_t *ident_start;
+
+    /* The length of the heredoc identifier. */
+    size_t ident_length;
+
+    /* The type of quote that the heredoc uses. */
+    pm_heredoc_quote_t quote;
+
+    /* The type of indentation that the heredoc uses. */
+    pm_heredoc_indent_t indent;
+} pm_heredoc_lex_mode_t;
+
+/*
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
+typedef struct pm_lex_mode {
+    /* The type of this lex mode. */
+    enum {
+        /* This state is used when any given token is being lexed. */
+        PM_LEX_DEFAULT,
+
+        /*
+         * This state is used when we're lexing as normal but inside an embedded
+         * expression of a string.
+         */
+        PM_LEX_EMBEXPR,
+
+        /*
+         * This state is used when we're lexing a variable that is embedded
+         * directly inside of a string with the # shorthand.
+         */
+        PM_LEX_EMBVAR,
+
+        /* This state is used when you are inside the content of a heredoc. */
+        PM_LEX_HEREDOC,
+
+        /*
+         * This state is used when we are lexing a list of tokens, as in a %w
+         * word list literal or a %i symbol list literal.
+         */
+        PM_LEX_LIST,
+
+        /*
+         * This state is used when a regular expression has been begun and we
+         * are looking for the terminator.
+         */
+        PM_LEX_REGEXP,
+
+        /*
+         * This state is used when we are lexing a string or a string-like
+         * token, as in string content with either quote or an xstring.
+         */
+        PM_LEX_STRING
+    } mode;
+
+    /* The data associated with this type of lex mode. */
+    union {
+        struct {
+            /* This keeps track of the nesting level of the list. */
+            size_t nesting;
+
+            /* Whether or not interpolation is allowed in this list. */
+            bool interpolation;
+
+            /*
+             * When lexing a list, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /* This is the terminator of the list literal. */
+            uint8_t terminator;
+
+            /*
+             * This is the character set that should be used to delimit the
+             * tokens within the list.
+             */
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+        } list;
+
+        struct {
+            /*
+             * This keeps track of the nesting level of the regular expression.
+             */
+            size_t nesting;
+
+            /*
+             * When lexing a regular expression, it takes into account balancing
+             * the terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /* This is the terminator of the regular expression. */
+            uint8_t terminator;
+
+            /*
+             * This is the character set that should be used to delimit the
+             * tokens within the regular expression.
+             */
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+        } regexp;
+
+        struct {
+            /* This keeps track of the nesting level of the string. */
+            size_t nesting;
+
+            /* Whether or not interpolation is allowed in this string. */
+            bool interpolation;
+
+            /*
+             * Whether or not at the end of the string we should allow a :,
+             * which would indicate this was a dynamic symbol instead of a
+             * string.
+             */
+            bool label_allowed;
+
+            /*
+             * When lexing a string, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /*
+             * This is the terminator of the string. It is typically either a
+             * single or double quote.
+             */
+            uint8_t terminator;
+
+            /*
+             * This is the character set that should be used to delimit the
+             * tokens within the string.
+             */
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+        } string;
+
+        struct {
+            /*
+             * All of the data necessary to lex a heredoc.
+             */
+            pm_heredoc_lex_mode_t base;
+
+            /*
+             * This is the pointer to the character where lexing should resume
+             * once the heredoc has been completely processed.
+             */
+            const uint8_t *next_start;
+
+            /*
+             * This is used to track the amount of common whitespace on each
+             * line so that we know how much to dedent each line in the case of
+             * a tilde heredoc.
+             */
+            size_t *common_whitespace;
+
+            /* True if the previous token ended with a line continuation. */
+            bool line_continuation;
+        } heredoc;
+    } as;
+
+    /* The previous lex state so that it knows how to pop. */
+    struct pm_lex_mode *prev;
+} pm_lex_mode_t;
+
+/*
+ * We pre-allocate a certain number of lex states in order to avoid having to
+ * call malloc too many times while parsing. You really shouldn't need more than
+ * this because you only really nest deeply when doing string interpolation.
+ */
+#define PM_LEX_STACK_SIZE 4
+
+/*
+ * While parsing, we keep track of a stack of contexts. This is helpful for
+ * error recovery so that we can pop back to a previous context when we hit a
+ * token that is understood by a parent context but not by the current context.
+ */
+typedef enum {
+    /* a null context, used for returning a value from a function */
+    PM_CONTEXT_NONE = 0,
+
+    /* a begin statement */
+    PM_CONTEXT_BEGIN,
+
+    /* an ensure statement with an explicit begin */
+    PM_CONTEXT_BEGIN_ENSURE,
+
+    /* a rescue else statement with an explicit begin */
+    PM_CONTEXT_BEGIN_ELSE,
+
+    /* a rescue statement with an explicit begin */
+    PM_CONTEXT_BEGIN_RESCUE,
+
+    /* expressions in block arguments using braces */
+    PM_CONTEXT_BLOCK_BRACES,
+
+    /* expressions in block arguments using do..end */
+    PM_CONTEXT_BLOCK_KEYWORDS,
+
+    /* an ensure statement within a do..end block */
+    PM_CONTEXT_BLOCK_ENSURE,
+
+    /* a rescue else statement within a do..end block */
+    PM_CONTEXT_BLOCK_ELSE,
+
+    /* expressions in block parameters `foo do |...| end ` */
+    PM_CONTEXT_BLOCK_PARAMETERS,
+
+    /* a rescue statement within a do..end block */
+    PM_CONTEXT_BLOCK_RESCUE,
+
+    /* a case when statements */
+    PM_CONTEXT_CASE_WHEN,
+
+    /* a case in statements */
+    PM_CONTEXT_CASE_IN,
+
+    /* a class declaration */
+    PM_CONTEXT_CLASS,
+
+    /* an ensure statement within a class statement */
+    PM_CONTEXT_CLASS_ENSURE,
+
+    /* a rescue else statement within a class statement */
+    PM_CONTEXT_CLASS_ELSE,
+
+    /* a rescue statement within a class statement */
+    PM_CONTEXT_CLASS_RESCUE,
+
+    /* a method definition */
+    PM_CONTEXT_DEF,
+
+    /* an ensure statement within a method definition */
+    PM_CONTEXT_DEF_ENSURE,
+
+    /* a rescue else statement within a method definition */
+    PM_CONTEXT_DEF_ELSE,
+
+    /* a rescue statement within a method definition */
+    PM_CONTEXT_DEF_RESCUE,
+
+    /* a method definition's parameters */
+    PM_CONTEXT_DEF_PARAMS,
+
+    /* a defined? expression */
+    PM_CONTEXT_DEFINED,
+
+    /* a method definition's default parameter */
+    PM_CONTEXT_DEFAULT_PARAMS,
+
+    /* an else clause */
+    PM_CONTEXT_ELSE,
+
+    /* an elsif clause */
+    PM_CONTEXT_ELSIF,
+
+    /* an interpolated expression */
+    PM_CONTEXT_EMBEXPR,
+
+    /* a for loop */
+    PM_CONTEXT_FOR,
+
+    /* a for loop's index */
+    PM_CONTEXT_FOR_INDEX,
+
+    /* an if statement */
+    PM_CONTEXT_IF,
+
+    /* a lambda expression with braces */
+    PM_CONTEXT_LAMBDA_BRACES,
+
+    /* a lambda expression with do..end */
+    PM_CONTEXT_LAMBDA_DO_END,
+
+    /* an ensure statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_ENSURE,
+
+    /* a rescue else statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_ELSE,
+
+    /* a rescue statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_RESCUE,
+
+    /* the predicate clause of a loop statement */
+    PM_CONTEXT_LOOP_PREDICATE,
+
+    /* the top level context */
+    PM_CONTEXT_MAIN,
+
+    /* a module declaration */
+    PM_CONTEXT_MODULE,
+
+    /* an ensure statement within a module statement */
+    PM_CONTEXT_MODULE_ENSURE,
+
+    /* a rescue else statement within a module statement */
+    PM_CONTEXT_MODULE_ELSE,
+
+    /* a rescue statement within a module statement */
+    PM_CONTEXT_MODULE_RESCUE,
+
+    /* a multiple target expression */
+    PM_CONTEXT_MULTI_TARGET,
+
+    /* a parenthesized expression */
+    PM_CONTEXT_PARENS,
+
+    /* an END block */
+    PM_CONTEXT_POSTEXE,
+
+    /* a predicate inside an if/elsif/unless statement */
+    PM_CONTEXT_PREDICATE,
+
+    /* a BEGIN block */
+    PM_CONTEXT_PREEXE,
+
+    /* a modifier rescue clause */
+    PM_CONTEXT_RESCUE_MODIFIER,
+
+    /* a singleton class definition */
+    PM_CONTEXT_SCLASS,
+
+    /* an ensure statement with a singleton class */
+    PM_CONTEXT_SCLASS_ENSURE,
+
+    /* a rescue else statement with a singleton class */
+    PM_CONTEXT_SCLASS_ELSE,
+
+    /* a rescue statement with a singleton class */
+    PM_CONTEXT_SCLASS_RESCUE,
+
+    /* a ternary expression */
+    PM_CONTEXT_TERNARY,
+
+    /* an unless statement */
+    PM_CONTEXT_UNLESS,
+
+    /* an until statement */
+    PM_CONTEXT_UNTIL,
+
+    /* a while statement */
+    PM_CONTEXT_WHILE,
+} pm_context_t;
+
+/* This is a node in a linked list of contexts. */
+typedef struct pm_context_node {
+    /* The context that this node represents. */
+    pm_context_t context;
+
+    /* A pointer to the previous context in the linked list. */
+    struct pm_context_node *prev;
+} pm_context_node_t;
+
+/* The type of shareable constant value that can be set. */
+typedef uint8_t pm_shareable_constant_value_t;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
+
+/*
+ * This tracks an individual local variable in a certain lexical context, as
+ * well as the number of times is it read.
+ */
+typedef struct {
+    /* The name of the local variable. */
+    pm_constant_id_t name;
+
+    /* The location of the local variable in the source. */
+    pm_location_t location;
+
+    /* The index of the local variable in the local table. */
+    uint32_t index;
+
+    /* The number of times the local variable is read. */
+    uint32_t reads;
+
+    /* The hash of the local variable. */
+    uint32_t hash;
+} pm_local_t;
+
+/*
+ * This is a set of local variables in a certain lexical context (method, class,
+ * module, etc.). We need to track how many times these variables are read in
+ * order to warn if they only get written.
+ */
+typedef struct pm_locals {
+    /* The number of local variables in the set. */
+    uint32_t size;
+
+    /* The capacity of the local variables set. */
+    uint32_t capacity;
+
+    /*
+     * A bloom filter over constant IDs stored in this set. Used to quickly
+     * reject lookups for names that are definitely not present, avoiding the
+     * cost of a linear scan or hash probe.
+     */
+    uint32_t bloom;
+
+    /* The nullable allocated memory for the local variables in the set. */
+    pm_local_t *locals;
+} pm_locals_t;
+
+/* The flags about scope parameters that can be set. */
+typedef uint8_t pm_scope_parameters_t;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
+
+/*
+ * This struct represents a node in a linked list of scopes. Some scopes can see
+ * into their parent scopes, while others cannot.
+ */
+typedef struct pm_scope {
+    /* A pointer to the previous scope in the linked list. */
+    struct pm_scope *previous;
+
+    /* The IDs of the locals in the given scope. */
+    pm_locals_t locals;
+
+    /*
+     * This is a list of the implicit parameters contained within the block.
+     * These will be processed after the block is parsed to determine the kind
+     * of parameters node that should be used and to check if any errors need to
+     * be added.
+     */
+    pm_node_list_t implicit_parameters;
+
+    /*
+     * This is a bitfield that indicates the parameters that are being used in
+     * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
+     * There are three different kinds of parameters that can be used in a
+     * scope:
+     *
+     * - Ordinary parameters (e.g., def foo(bar); end)
+     * - Numbered parameters (e.g., def foo; _1; end)
+     * - The it parameter (e.g., def foo; it; end)
+     *
+     * If ordinary parameters are being used, then certain parameters can be
+     * forwarded to another method/structure. Those are indicated by four
+     * additional bits in the params field. For example, some combinations of:
+     *
+     * - def foo(*); end
+     * - def foo(**); end
+     * - def foo(&); end
+     * - def foo(...); end
+     */
+    pm_scope_parameters_t parameters;
+
+    /*
+     * The current state of constant shareability for this scope. This is
+     * changed by magic shareable_constant_value comments.
+     */
+    pm_shareable_constant_value_t shareable_constant;
+
+    /*
+     * A boolean indicating whether or not this scope can see into its parent.
+     * If closed is true, then the scope cannot see into its parent.
+     */
+    bool closed;
+} pm_scope_t;
+
+/*
+ * A struct that represents a stack of boolean values.
+ */
+typedef uint32_t pm_state_stack_t;
+
+/*
+ * This struct represents the overall parser. It contains a reference to the
+ * source file, as well as pointers that indicate where in the source it's
+ * currently parsing. It also contains the most recent and current token that
+ * it's considering.
+ */
+struct pm_parser_t {
+    /* The arena used for all AST-lifetime allocations. Caller-owned. */
+    pm_arena_t *arena;
+
+    /* The arena used for parser metadata (comments, diagnostics, etc.). */
+    pm_arena_t metadata_arena;
+
+    /*
+     * The next node identifier that will be assigned. This is a unique
+     * identifier used to track nodes such that the syntax tree can be dropped
+     * but the node can be found through another parse.
+     */
+    uint32_t node_id;
+
+    /*
+     * A single-entry cache for pm_parser_constant_id_raw. Avoids redundant
+     * constant pool lookups when the same token is resolved multiple times
+     * (e.g., once during lexing for local variable detection, and again
+     * during parsing for node creation).
+     */
+    struct {
+        const uint8_t *start;
+        const uint8_t *end;
+        pm_constant_id_t id;
+    } constant_cache;
+
+    /* The current state of the lexer. */
+    pm_lex_state_t lex_state;
+
+    /* Tracks the current nesting of (), [], and {}. */
+    int enclosure_nesting;
+
+    /*
+     * Used to temporarily track the nesting of enclosures to determine if a {
+     * is the beginning of a lambda following the parameters of a lambda.
+     */
+    int lambda_enclosure_nesting;
+
+    /*
+     * Used to track the nesting of braces to ensure we get the correct value
+     * when we are interpolating blocks with braces.
+     */
+    int brace_nesting;
+
+    /*
+     * The stack used to determine if a do keyword belongs to the predicate of a
+     * while, until, or for loop.
+     */
+    pm_state_stack_t do_loop_stack;
+
+    /*
+     * The stack used to determine if a do keyword belongs to the beginning of a
+     * block.
+     */
+    pm_state_stack_t accepts_block_stack;
+
+    /* A stack of lex modes. */
+    struct {
+        /* The current mode of the lexer. */
+        pm_lex_mode_t *current;
+
+        /* The stack of lexer modes. */
+        pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
+
+        /* The current index into the lexer mode stack. */
+        size_t index;
+    } lex_modes;
+
+    /* The pointer to the start of the source. */
+    const uint8_t *start;
+
+    /* The pointer to the end of the source. */
+    const uint8_t *end;
+
+    /* The previous token we were considering. */
+    pm_token_t previous;
+
+    /* The current token we're considering. */
+    pm_token_t current;
+
+    /*
+     * This is a special field set on the parser when we need the parser to jump
+     * to a specific location when lexing the next token, as opposed to just
+     * using the end of the previous token. Normally this is NULL.
+     */
+    const uint8_t *next_start;
+
+    /*
+     * This field indicates the end of a heredoc whose identifier was found on
+     * the current line. If another heredoc is found on the same line, then this
+     * will be moved forward to the end of that heredoc. If no heredocs are
+     * found on a line then this is NULL.
+     */
+    const uint8_t *heredoc_end;
+
+    /* The list of comments that have been found while parsing. */
+    pm_list_t comment_list;
+
+    /* The list of magic comments that have been found while parsing. */
+    pm_list_t magic_comment_list;
+
+    /*
+     * An optional location that represents the location of the __END__ marker
+     * and the rest of the content of the file. This content is loaded into the
+     * DATA constant when the file being parsed is the main file being executed.
+     */
+    pm_location_t data_loc;
+
+    /* The list of warnings that have been found while parsing. */
+    pm_list_t warning_list;
+
+    /* The list of errors that have been found while parsing. */
+    pm_list_t error_list;
+
+    /* The current local scope. */
+    pm_scope_t *current_scope;
+
+    /* The current parsing context. */
+    pm_context_node_t *current_context;
+
+    /*
+     * The hash keys for the hash that is currently being parsed. This is not
+     * usually necessary because it can pass it down the various call chains,
+     * but in the event that you're parsing a hash that is being directly
+     * pushed into another hash with **, we need to share the hash keys so that
+     * we can warn for the nested hash as well.
+     */
+    pm_static_literals_t *current_hash_keys;
+
+    /*
+     * The encoding functions for the current file is attached to the parser as
+     * it's parsing so that it can change with a magic comment.
+     */
+    const pm_encoding_t *encoding;
+
+    /*
+     * When the encoding that is being used to parse the source is changed by
+     * prism, we provide the ability here to call out to a user-defined
+     * function.
+     */
+    pm_encoding_changed_callback_t encoding_changed_callback;
+
+    /*
+     * This pointer indicates where a comment must start if it is to be
+     * considered an encoding comment.
+     */
+    const uint8_t *encoding_comment_start;
+
+    /*
+     * When you are lexing through a file, the lexer needs all of the information
+     * that the parser additionally provides (for example, the local table). So if
+     * you want to properly lex Ruby, you need to actually lex it in the context of
+     * the parser. In order to provide this functionality, we optionally allow a
+     * struct to be attached to the parser that calls back out to a user-provided
+     * callback when each token is lexed.
+     */
+    struct {
+        /*
+         * This is the callback that is called when a token is lexed. It is
+         * passed the opaque data pointer, the parser, and the token that was
+         * lexed.
+         */
+        pm_lex_callback_t callback;
+
+        /*
+         * This opaque pointer is used to provide whatever information the user
+         * deemed necessary to the callback. In our case we use it to pass the
+         * array that the tokens get appended into.
+         */
+        void *data;
+    } lex_callback;
+
+    /*
+     * This is the path of the file being parsed. We use the filepath when
+     * constructing SourceFileNodes.
+     */
+    pm_string_t filepath;
+
+    /*
+     * This constant pool keeps all of the constants defined throughout the file
+     * so that we can reference them later.
+     */
+    pm_constant_pool_t constant_pool;
+
+    /* This is the list of line offsets in the source file. */
+    pm_line_offset_list_t line_offsets;
+
+    /*
+     * State communicated from the lexer to the parser for integer tokens.
+     */
+    struct {
+        /*
+         * A flag indicating the base of the integer (binary, octal, decimal,
+         * hexadecimal). Set during lexing and read during node creation.
+         */
+        pm_node_flags_t base;
+
+        /*
+         * When lexing a decimal integer that fits in a uint32_t, we compute
+         * the value during lexing to avoid re-scanning the digits during
+         * parsing. If lexed is true, this holds the result and
+         * pm_integer_parse can be skipped.
+         */
+        uint32_t value;
+
+        /* Whether value holds a valid pre-computed integer. */
+        bool lexed;
+    } integer;
+
+    /*
+     * This string is used to pass information from the lexer to the parser. It
+     * is particularly necessary because of escape sequences.
+     */
+    pm_string_t current_string;
+
+    /*
+     * The line number at the start of the parse. This will be used to offset
+     * the line numbers of all of the locations.
+     */
+    int32_t start_line;
+
+    /*
+     * When a string-like expression is being lexed, any byte or escape sequence
+     * that resolves to a value whose top bit is set (i.e., >= 0x80) will
+     * explicitly set the encoding to the same encoding as the source.
+     * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
+     * resolves to a value whose top bit is set, then the encoding will be
+     * explicitly set to UTF-8.
+     *
+     * The _next_ time this happens, if the encoding that is about to become the
+     * explicitly set encoding does not match the previously set explicit
+     * encoding, a mixed encoding error will be emitted.
+     *
+     * When the expression is finished being lexed, the explicit encoding
+     * controls the encoding of the expression. For the most part this means
+     * that the expression will either be encoded in the source encoding or
+     * UTF-8. This holds for all encodings except US-ASCII. If the source is
+     * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
+     * expression will be encoded as ASCII-8BIT.
+     *
+     * Note that if the expression is a list, different elements within the same
+     * list can have different encodings, so this will get reset between each
+     * element. Furthermore all of this only applies to lists that support
+     * interpolation, because otherwise escapes that could change the encoding
+     * are ignored.
+     *
+     * At first glance, it may make more sense for this to live on the lexer
+     * mode, but we need it here to communicate back to the parser for character
+     * literals that do not push a new lexer mode.
+     */
+    const pm_encoding_t *explicit_encoding;
+
+    /*
+     * When parsing block exits (e.g., break, next, redo), we need to validate
+     * that they are in correct contexts. For the most part we can do this by
+     * looking at our parent contexts. However, modifier while and until
+     * expressions can change that context to make block exits valid. In these
+     * cases, we need to keep track of the block exits and then validate them
+     * after the expression has been parsed.
+     *
+     * We use a pointer here because we don't want to keep a whole list attached
+     * since this will only be used in the context of begin/end expressions.
+     */
+    pm_node_list_t *current_block_exits;
+
+    /* The version of prism that we should use to parse. */
+    pm_options_version_t version;
+
+    /* The command line flags given from the options. */
+    uint8_t command_line;
+
+    /*
+     * Whether or not we have found a frozen_string_literal magic comment with
+     * a true or false value.
+     * May be:
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+     */
+    int8_t frozen_string_literal;
+
+    /*
+     * Whether or not we are parsing an eval string. This impacts whether or not
+     * we should evaluate if block exits/yields are valid.
+     */
+    bool parsing_eval;
+
+    /*
+     * Whether or not we are parsing a "partial" script, which is a script that
+     * will be evaluated in the context of another script, so we should not
+     * check jumps (next/break/etc.) for validity.
+     */
+    bool partial_script;
+
+    /* Whether or not we're at the beginning of a command. */
+    bool command_start;
+
+    /*
+     * Whether or not we're currently parsing the body of an endless method
+     * definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be
+     * consumed by commands (it should bubble up to the outer context).
+     */
+    bool in_endless_def_body;
+
+    /* Whether or not we're currently recovering from a syntax error. */
+    bool recovering;
+
+    /*
+     * Whether or not the source being parsed could become valid if more input
+     * were appended. This is set to false when the parser encounters a token
+     * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to
+     * merely incomplete.
+     */
+    bool continuable;
+
+    /*
+     * This is very specialized behavior for when you want to parse in a context
+     * that does not respect encoding comments. Its main use case is translating
+     * into the whitequark/parser AST which re-encodes source files in UTF-8
+     * before they are parsed and ignores encoding comments.
+     */
+    bool encoding_locked;
+
+    /*
+     * Whether or not the encoding has been changed by a magic comment. We use
+     * this to provide a fast path for the lexer instead of going through the
+     * function pointer.
+     */
+    bool encoding_changed;
+
+    /*
+     * This flag indicates that we are currently parsing a pattern matching
+     * expression and impacts that calculation of newlines.
+     */
+    bool pattern_matching_newlines;
+
+    /* This flag indicates that we are currently parsing a keyword argument. */
+    bool in_keyword_arg;
+
+    /*
+     * Whether or not the parser has seen a token that has semantic meaning
+     * (i.e., a token that is not a comment or whitespace).
+     */
+    bool semantic_token_seen;
+
+    /*
+     * By default, Ruby always warns about mismatched indentation. This can be
+     * toggled with a magic comment.
+     */
+    bool warn_mismatched_indentation;
+
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
+    /*
+     * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding
+     * the nibble-based tables on every call when the charset hasn't changed
+     * (which is the common case during string/regex/list lexing).
+     */
+    struct {
+        /* The cached charset (null-terminated, max 11 chars + NUL). */
+        uint8_t charset[12];
+
+        /* Nibble-based low lookup table for SIMD matching. */
+        uint8_t low_lut[16];
+
+        /* Nibble-based high lookup table for SIMD matching. */
+        uint8_t high_lut[16];
+
+        /* Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */
+        uint64_t table[4];
+    } strpbrk_cache;
+#endif
+};
+
+/*
+ * Initialize a parser with the given start and end pointers.
+ */
+void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
+
+/*
+ * Free the memory held by the given parser.
+ *
+ * This does not free the `pm_options_t` object that was used to initialize the
+ * parser.
+ */
+void pm_parser_cleanup(pm_parser_t *parser);
+
+#endif
diff --git a/prism/internal/regexp.h b/prism/internal/regexp.h
new file mode 100644
index 0000000000..3710c984fc
--- /dev/null
+++ b/prism/internal/regexp.h
@@ -0,0 +1,41 @@
+#ifndef PRISM_INTERNAL_REGEXP_H
+#define PRISM_INTERNAL_REGEXP_H
+
+#include "prism/ast.h"
+#include "prism/parser.h"
+
+/*
+ * Accumulation state for named capture groups found during regexp parsing.
+ * The caller initializes this with the call node and passes it to
+ * pm_regexp_parse. The regexp parser populates match and names as groups
+ * are found.
+ */
+typedef struct {
+    /* The call node wrapping the regular expression node (for =~). */
+    pm_call_node_t *call;
+
+    /* The match write node being built, or NULL if no captures found yet. */
+    pm_match_write_node_t *match;
+
+    /* The list of capture names found so far (for deduplication). */
+    pm_constant_id_list_t names;
+} pm_regexp_name_data_t;
+
+/*
+ * Callback invoked by pm_regexp_parse() for each named capture group found.
+ */
+typedef void (*pm_regexp_name_callback_t)(pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data);
+
+/*
+ * Parse a regular expression, validate its encoding, and optionally extract
+ * named capture groups. Returns the encoding flags to set on the node.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);
+
+/*
+ * Parse an interpolated regular expression for named capture groups only.
+ * No encoding validation is performed.
+ */
+void pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);
+
+#endif
diff --git a/prism/internal/serialize.h b/prism/internal/serialize.h
new file mode 100644
index 0000000000..e611a0374b
--- /dev/null
+++ b/prism/internal/serialize.h
@@ -0,0 +1,34 @@
+#ifndef PRISM_INTERNAL_SERIALIZE_H
+#define PRISM_INTERNAL_SERIALIZE_H
+
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+
+#include "prism/ast.h"
+#include "prism/buffer.h"
+#include "prism/excludes.h"
+#include "prism/parser.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+/*
+ * Serialize the given list of comments to the given buffer.
+ */
+void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer);
+
+/*
+ * Serialize the name of the encoding to the buffer.
+ */
+void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer);
+
+/*
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ */
+void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+#endif
+
+#endif
diff --git a/prism/internal/source.h b/prism/internal/source.h
new file mode 100644
index 0000000000..b3c2b55be3
--- /dev/null
+++ b/prism/internal/source.h
@@ -0,0 +1,72 @@
+#ifndef PRISM_INTERNAL_SOURCE_H
+#define PRISM_INTERNAL_SOURCE_H
+
+#include "prism/source.h"
+#include "prism/buffer.h"
+
+#include <stdbool.h>
+
+/*
+ * The type of source, which determines cleanup behavior.
+ */
+typedef enum {
+    /* Wraps existing constant memory, no cleanup. */
+    PM_SOURCE_CONSTANT,
+
+    /* Wraps existing shared memory (non-owning slice), no cleanup. */
+    PM_SOURCE_SHARED,
+
+    /* Owns a heap-allocated buffer, freed on cleanup. */
+    PM_SOURCE_OWNED,
+
+    /* Memory-mapped file, unmapped on cleanup. */
+    PM_SOURCE_MAPPED,
+
+    /* Stream source backed by a pm_buffer_t. */
+    PM_SOURCE_STREAM
+} pm_source_type_t;
+
+/*
+ * The internal representation of a source.
+ */
+struct pm_source_t {
+    /* A pointer to the start of the source data. */
+    const uint8_t *source;
+
+    /* The length of the source data in bytes. */
+    size_t length;
+
+    /* The type of the source. */
+    pm_source_type_t type;
+
+    /* Stream-specific data, only used for PM_SOURCE_STREAM sources. */
+    struct {
+        /* The buffer that holds the accumulated stream data. */
+        pm_buffer_t *buffer;
+
+        /* The stream object to read from. */
+        void *stream;
+
+        /* The function to use to read from the stream. */
+        pm_source_stream_fgets_t *fgets;
+
+        /* The function to use to check if the stream is at EOF. */
+        pm_source_stream_feof_t *feof;
+
+        /* Whether the stream has reached EOF. */
+        bool eof;
+    } stream;
+};
+
+/*
+ * Read from a stream into the source's internal buffer. This is used by
+ * pm_parse_stream to incrementally read the source.
+ */
+bool pm_source_stream_read(pm_source_t *source);
+
+/*
+ * Returns whether the stream source has reached EOF.
+ */
+bool pm_source_stream_eof(const pm_source_t *source);
+
+#endif
diff --git a/prism/static_literals.h b/prism/internal/static_literals.h
index bd29761899..d59002ac0a 100644
--- a/prism/static_literals.h
+++ b/prism/internal/static_literals.h
@@ -1,33 +1,25 @@
-/**
- * @file static_literals.h
- *
- * A set of static literal nodes that can be checked for duplicates.
- */
-#ifndef PRISM_STATIC_LITERALS_H
-#define PRISM_STATIC_LITERALS_H
+#ifndef PRISM_INTERNAL_STATIC_LITERALS_H
+#define PRISM_INTERNAL_STATIC_LITERALS_H
 
-#include "prism/defines.h"
 #include "prism/ast.h"
-#include "prism/util/pm_newline_list.h"
-
-#include <assert.h>
-#include <stdbool.h>
+#include "prism/buffer.h"
+#include "prism/line_offset_list.h"
 
-/**
+/*
  * An internal hash table for a set of nodes.
  */
 typedef struct {
-    /** The array of nodes in the hash table. */
+    /* The array of nodes in the hash table. */
     pm_node_t **nodes;
 
-    /** The size of the hash table. */
+    /* The size of the hash table. */
     uint32_t size;
 
-    /** The space that has been allocated in the hash table. */
+    /* The space that has been allocated in the hash table. */
     uint32_t capacity;
 } pm_node_hash_t;
 
-/**
+/*
  * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
  * to alert the user of potential issues. To do this, we keep a set of the nodes
  * that have been seen so far, and compare whenever we find a new node.
@@ -36,86 +28,71 @@ typedef struct {
  * that need to be performed.
  */
 typedef struct {
-    /**
+    /*
      * This is the set of IntegerNode and SourceLineNode instances.
      */
     pm_node_hash_t integer_nodes;
 
-    /**
+    /*
      * This is the set of FloatNode instances.
      */
     pm_node_hash_t float_nodes;
 
-    /**
+    /*
      * This is the set of RationalNode and ImaginaryNode instances.
      */
     pm_node_hash_t number_nodes;
 
-    /**
+    /*
      * This is the set of StringNode and SourceFileNode instances.
      */
     pm_node_hash_t string_nodes;
 
-    /**
+    /*
      * This is the set of RegularExpressionNode instances.
      */
     pm_node_hash_t regexp_nodes;
 
-    /**
+    /*
      * This is the set of SymbolNode instances.
      */
     pm_node_hash_t symbol_nodes;
 
-    /**
+    /*
      * A pointer to the last TrueNode instance that was inserted, or NULL.
      */
     pm_node_t *true_node;
 
-    /**
+    /*
      * A pointer to the last FalseNode instance that was inserted, or NULL.
      */
     pm_node_t *false_node;
 
-    /**
+    /*
      * A pointer to the last NilNode instance that was inserted, or NULL.
      */
     pm_node_t *nil_node;
 
-    /**
+    /*
      * A pointer to the last SourceEncodingNode instance that was inserted, or
      * NULL.
      */
     pm_node_t *source_encoding_node;
 } pm_static_literals_t;
 
-/**
+/*
  * Add a node to the set of static literals.
- *
- * @param newline_list The list of newline offsets to use to calculate lines.
- * @param start_line The line number that the parser starts on.
- * @param literals The set of static literals to add the node to.
- * @param node The node to add to the set.
- * @param replace Whether to replace the previous node if one already exists.
- * @return A pointer to the node that is being overwritten, if there is one.
  */
-pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
+pm_node_t * pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
 
-/**
+/*
  * Free the internal memory associated with the given static literals set.
- *
- * @param literals The set of static literals to free.
  */
 void pm_static_literals_free(pm_static_literals_t *literals);
 
-/**
+/*
  * Create a string-based representation of the given static literal.
- *
- * @param buffer The buffer to write the string to.
- * @param newline_list The list of newline offsets to use to calculate lines.
- * @param start_line The line number that the parser starts on.
- * @param encoding_name The name of the encoding of the source being parsed.
- * @param node The node to create a string representation of.
  */
-void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
+void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node);
 
 #endif
diff --git a/prism/internal/stringy.h b/prism/internal/stringy.h
new file mode 100644
index 0000000000..1aaa23ea75
--- /dev/null
+++ b/prism/internal/stringy.h
@@ -0,0 +1,30 @@
+#ifndef PRISM_INTERNAL_STRINGY_H
+#define PRISM_INTERNAL_STRINGY_H
+
+#include "prism/stringy.h"
+
+/*
+ * Defines an empty string. This is useful for initializing a string that will
+ * be filled in later.
+ */
+#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+/*
+ * Initialize a shared string that is based on initial input.
+ */
+void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
+
+/*
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
+
+/*
+ * Free the associated memory of the given string.
+ */
+void pm_string_cleanup(pm_string_t *string);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.h b/prism/internal/strncasecmp.h
index 5cb88cb5eb..775f6a993e 100644
--- a/prism/util/pm_strncasecmp.h
+++ b/prism/internal/strncasecmp.h
@@ -1,18 +1,10 @@
-/**
- * @file pm_strncasecmp.h
- *
- * A custom strncasecmp implementation.
- */
-#ifndef PRISM_STRNCASECMP_H
-#define PRISM_STRNCASECMP_H
+#ifndef PRISM_INTERNAL_STRNCASECMP_H
+#define PRISM_INTERNAL_STRNCASECMP_H
 
-#include "prism/defines.h"
-
-#include <ctype.h>
 #include <stddef.h>
 #include <stdint.h>
 
-/**
+/*
  * Compare two strings, ignoring case, up to the given length. Returns 0 if the
  * strings are equal, a negative number if string1 is less than string2, or a
  * positive number if string1 is greater than string2.
@@ -20,12 +12,6 @@
  * Note that this is effectively our own implementation of strncasecmp, but it's
  * not available on all of the platforms we want to support so we're rolling it
  * here.
- *
- * @param string1 The first string to compare.
- * @param string2 The second string to compare
- * @param length The maximum number of characters to compare.
- * @return 0 if the strings are equal, a negative number if string1 is less than
- *     string2, or a positive number if string1 is greater than string2.
  */
 int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
 
diff --git a/prism/util/pm_strpbrk.h b/prism/internal/strpbrk.h
index f387bd5782..d64156c002 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/internal/strpbrk.h
@@ -1,19 +1,15 @@
-/**
- * @file pm_strpbrk.h
- *
- * A custom strpbrk implementation.
- */
-#ifndef PRISM_STRPBRK_H
-#define PRISM_STRPBRK_H
+#ifndef PRISM_INTERNAL_STRPBRK_H
+#define PRISM_INTERNAL_STRPBRK_H
 
-#include "prism/defines.h"
-#include "prism/diagnostic.h"
 #include "prism/parser.h"
 
+/* The maximum number of bytes in a strpbrk charset. */
+#define PM_STRPBRK_CACHE_SIZE 16
+
 #include <stddef.h>
-#include <string.h>
+#include <stdint.h>
 
-/**
+/*
  * Here we have rolled our own version of strpbrk. The standard library strpbrk
  * has undefined behavior when the source string is not null-terminated. We want
  * to support strings that are not null-terminated because pm_parse does not
@@ -31,15 +27,6 @@
  * characters that are trailing bytes of multi-byte characters. For example, in
  * Shift-JIS, the backslash character can be a trailing byte. In that case we
  * need to take a slower path and iterate one multi-byte character at a time.
- *
- * @param parser The parser.
- * @param source The source to search.
- * @param charset The charset to search for.
- * @param length The maximum number of bytes to search.
- * @param validate Whether to validate that the source string is valid in the
- *     current encoding of the parser.
- * @return A pointer to the first character in the source string that is in the
- *     charset, or NULL if no such character exists.
  */
 const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
 
diff --git a/prism/internal/tokens.h b/prism/internal/tokens.h
new file mode 100644
index 0000000000..3a983e54ae
--- /dev/null
+++ b/prism/internal/tokens.h
@@ -0,0 +1,11 @@
+#ifndef PRISM_INTERNAL_TOKENS_H
+#define PRISM_INTERNAL_TOKENS_H
+
+#include "prism/ast.h"
+
+/*
+ * Returns the human name of the given token type.
+ */
+const char * pm_token_str(pm_token_type_t token_type);
+
+#endif
diff --git a/prism/json.h b/prism/json.h
new file mode 100644
index 0000000000..11039e7796
--- /dev/null
+++ b/prism/json.h
@@ -0,0 +1,32 @@
+/**
+ * @file json.h
+ */
+#ifndef PRISM_JSON_H
+#define PRISM_JSON_H
+
+#include "prism/excludes.h"
+
+/* We optionally support dumping to JSON. For systems that don't want or need
+ * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+ */
+#ifndef PRISM_EXCLUDE_JSON
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+#include "prism/buffer.h"
+#include "prism/parser.h"
+
+/**
+ * Dump JSON to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param parser The parser that parsed the node.
+ * @param node The node to serialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3);
+
+#endif
+
+#endif
diff --git a/prism/line_offset_list.c b/prism/line_offset_list.c
new file mode 100644
index 0000000000..ce217ebd3f
--- /dev/null
+++ b/prism/line_offset_list.c
@@ -0,0 +1,100 @@
+#include "prism/compiler/align.h"
+#include "prism/internal/line_offset_list.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
+#include <string.h>
+
+/**
+ * Initialize a new line offset list with the given capacity.
+ */
+void
+pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity) {
+    list->offsets = (uint32_t *) pm_arena_alloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+
+    // The first line always has offset 0.
+    list->offsets[0] = 0;
+    list->size = 1;
+    list->capacity = capacity;
+}
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ */
+void
+pm_line_offset_list_clear(pm_line_offset_list_t *list) {
+    list->size = 1;
+}
+
+/**
+ * Append a new offset to the newline list (slow path: resize and store).
+ */
+void
+pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+    size_t new_capacity = (list->capacity * 3) / 2;
+    uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+
+    memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t));
+
+    list->offsets = new_offsets;
+    list->capacity = new_capacity;
+
+    assert(list->size == 0 || cursor > list->offsets[list->size - 1]);
+    list->offsets[list->size++] = cursor;
+}
+
+/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t
+pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) {
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == cursor) {
+            return ((int32_t) mid) + start_line;
+        }
+
+        if (list->offsets[mid] < cursor) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((int32_t) left) + start_line - 1;
+}
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
+pm_line_column_t
+pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) {
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == cursor) {
+            return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
+        }
+
+        if (list->offsets[mid] < cursor) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((pm_line_column_t) {
+        .line = ((int32_t) left) + start_line - 1,
+        .column = cursor - list->offsets[left - 1]
+    });
+}
diff --git a/prism/line_offset_list.h b/prism/line_offset_list.h
new file mode 100644
index 0000000000..848bc49139
--- /dev/null
+++ b/prism/line_offset_list.h
@@ -0,0 +1,61 @@
+/**
+ * @file line_offset_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
+#ifndef PRISM_LINE_OFFSET_LIST_H
+#define PRISM_LINE_OFFSET_LIST_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A list of offsets of the start of lines in a string. The offsets are assumed
+ * to be sorted/inserted in ascending order.
+ */
+typedef struct {
+    /** The number of offsets in the list. */
+    size_t size;
+
+    /** The capacity of the list that has been allocated. */
+    size_t capacity;
+
+    /** The list of offsets. */
+    uint32_t *offsets;
+} pm_line_offset_list_t;
+
+/**
+ * A line and column in a string.
+ */
+typedef struct {
+    /** The line number. */
+    int32_t line;
+
+    /** The column in bytes. */
+    uint32_t column;
+} pm_line_column_t;
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor The offset to search for.
+ * @param start_line The line to start counting from.
+ * @returns The line and column of the given offset.
+ */
+PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/list.c b/prism/list.c
new file mode 100644
index 0000000000..8d4cd1be94
--- /dev/null
+++ b/prism/list.c
@@ -0,0 +1,24 @@
+#include "prism/internal/list.h"
+
+/**
+ * Returns the size of the list.
+ */
+size_t
+pm_list_size(pm_list_t *list) {
+    return list->size;
+}
+
+/**
+ * Append a node to the given list.
+ */
+void
+pm_list_append(pm_list_t *list, pm_list_node_t *node) {
+    if (list->head == NULL) {
+        list->head = node;
+    } else {
+        list->tail->next = node;
+    }
+
+    list->tail = node;
+    list->size++;
+}
diff --git a/prism/magic_comments.h b/prism/magic_comments.h
new file mode 100644
index 0000000000..c9d6b600e8
--- /dev/null
+++ b/prism/magic_comments.h
@@ -0,0 +1,35 @@
+/**
+ * @file magic_comments.h
+ *
+ * Types and functions related to magic comments found during parsing.
+ */
+#ifndef PRISM_MAGIC_COMMENTS_H
+#define PRISM_MAGIC_COMMENTS_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+#include <stddef.h>
+
+/** An opaque pointer to a magic comment found while parsing. */
+typedef struct pm_magic_comment_t pm_magic_comment_t;
+
+/**
+ * Returns the location of the key associated with the given magic comment.
+ *
+ * @param magic_comment the magic comment whose key location we want to get
+ * @returns the location of the key associated with the given magic comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1);
+
+/**
+ * Returns the location of the value associated with the given magic comment.
+ *
+ * @param magic_comment the magic comment whose value location we want to get
+ * @returns the location of the value associated with the given magic comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/memchr.c
index 7ea20ace6d..900e6245b7 100644
--- a/prism/util/pm_memchr.c
+++ b/prism/memchr.c
@@ -1,15 +1,19 @@
-#include "prism/util/pm_memchr.h"
+#include "prism/internal/memchr.h"
 
-#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#define TRAILING_BYTE_MINIMUM 0x40
 
 /**
  * We need to roll our own memchr to handle cases where the encoding changes and
  * we need to search for a character in a buffer that could be the trailing byte
  * of a multibyte character.
  */
-void *
+const void *
 pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
-    if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
+    if (encoding_changed && encoding->multibyte && character >= TRAILING_BYTE_MINIMUM) {
         const uint8_t *source = (const uint8_t *) memory;
         size_t index = 0;
 
@@ -31,5 +35,3 @@ pm_memchr(const void *memory, int character, size_t number, bool encoding_change
         return memchr(memory, character, number);
     }
 }
-
-#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
diff --git a/prism/node.h b/prism/node.h
index e8686a327c..75bc3c9b2d 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -6,9 +6,10 @@
 #ifndef PRISM_NODE_H
 #define PRISM_NODE_H
 
-#include "prism/defines.h"
-#include "prism/parser.h"
-#include "prism/util/pm_buffer.h"
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
 
 /**
  * Loop through each node in the node list, writing each node to the given
@@ -18,51 +19,12 @@
     for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
 
 /**
- * Append a new node onto the end of the node list.
- *
- * @param list The list to append to.
- * @param node The node to append.
- */
-void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
-
-/**
- * Prepend a new node onto the beginning of the node list.
- *
- * @param list The list to prepend to.
- * @param node The node to prepend.
- */
-void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
-
-/**
- * Concatenate the given node list onto the end of the other node list.
- *
- * @param list The list to concatenate onto.
- * @param other The list to concatenate.
- */
-void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
-
-/**
- * Free the internal memory associated with the given node list.
- *
- * @param list The list to free.
- */
-void pm_node_list_free(pm_node_list_t *list);
-
-/**
- * Deallocate a node and all of its children.
- *
- * @param parser The parser that owns the node.
- * @param node The node to deallocate.
- */
-PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
-
-/**
  * Returns a string representation of the given node type.
  *
  * @param node_type The node type to convert to a string.
- * @return A string representation of the given node type.
+ * @returns A string representation of the given node type.
  */
-PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
+PRISM_EXPORTED_FUNCTION const char * pm_node_type(pm_node_type_t node_type);
 
 /**
  * Visit each of the nodes in this subtree using the given visitor callback. The
@@ -80,7 +42,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
  * bool visit(const pm_node_t *node, void *data) {
  *     size_t *indent = (size_t *) data;
  *     for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
- *     printf("%s\n", pm_node_type_to_str(node->type));
+ *     printf("%s\n", pm_node_type(node->type));
  *
  *     size_t next_indent = *indent + 1;
  *     size_t *next_data = &next_indent;
@@ -93,18 +55,21 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
  *     const char *source = "1 + 2; 3 + 4";
  *     size_t size = strlen(source);
  *
- *     pm_parser_t parser;
- *     pm_options_t options = { 0 };
- *     pm_parser_init(&parser, (const uint8_t *) source, size, &options);
+ *     pm_arena_t *arena = pm_arena_new();
+ *     pm_options_t *options = pm_options_new();
+ *
+ *     pm_parser_t *parser = pm_parser_new(arena, (const uint8_t *) source, size, options);
  *
  *     size_t indent = 0;
- *     pm_node_t *node = pm_parse(&parser);
+ *     pm_node_t *node = pm_parse(parser);
  *
  *     size_t *data = &indent;
  *     pm_visit_node(node, visit, data);
  *
- *     pm_node_destroy(&parser, node);
- *     pm_parser_free(&parser);
+ *     pm_parser_free(parser);
+ *     pm_options_free(options);
+ *     pm_arena_free(arena);
+ *
  *     return EXIT_SUCCESS;
  * }
  * ```
@@ -113,7 +78,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
  * @param visitor The callback to call for each node in the subtree.
  * @param data An opaque pointer that is passed to the visitor callback.
  */
-PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1);
 
 /**
  * Visit the children of the given node with the given callback. This is the
@@ -124,6 +89,6 @@ PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor
  * @param visitor The callback to call for each child node.
  * @param data An opaque pointer that is passed to the visitor callback.
  */
-PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1);
 
 #endif
diff --git a/prism/options.c b/prism/options.c
index a457178ce8..b589865a2a 100644
--- a/prism/options.c
+++ b/prism/options.c
@@ -1,18 +1,78 @@
-#include "prism/options.h"
+#include "prism/internal/options.h"
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/char.h"
+#include "prism/internal/stringy.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Allocate a new options struct. If the options struct cannot be allocated,
+ * this function aborts the process.
+ */
+pm_options_t *
+pm_options_new(void) {
+    pm_options_t *options = xcalloc(1, sizeof(pm_options_t));
+    if (options == NULL) abort();
+    return options;
+}
+
+/**
+ * Free the internal memory associated with the options.
+ */
+void
+pm_options_cleanup(pm_options_t *options) {
+    pm_string_cleanup(&options->filepath);
+    pm_string_cleanup(&options->encoding);
+
+    for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
+        pm_options_scope_t *scope = &options->scopes[scope_index];
+
+        for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
+            pm_string_cleanup(&scope->locals[local_index]);
+        }
+
+        xfree_sized(scope->locals, scope->locals_count * sizeof(pm_string_t));
+    }
+
+    xfree_sized(options->scopes, options->scopes_count * sizeof(pm_options_scope_t));
+}
+
+/**
+ * Free both the held memory of the given options struct and the struct itself.
+ *
+ * @param options The options struct to free.
+ */
+void
+pm_options_free(pm_options_t *options) {
+    pm_options_cleanup(options);
+    xfree_sized(options, sizeof(pm_options_t));
+}
 
 /**
  * Set the shebang callback option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) {
     options->shebang_callback = shebang_callback;
     options->shebang_callback_data = shebang_callback_data;
 }
 
 /**
+ * Get the filepath option on the given options struct.
+ */
+const pm_string_t *
+pm_options_filepath(const pm_options_t *options) {
+    return &options->filepath;
+}
+
+/**
  * Set the filepath option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_filepath_set(pm_options_t *options, const char *filepath) {
     pm_string_constant_init(&options->filepath, filepath, strlen(filepath));
 }
@@ -20,7 +80,7 @@ pm_options_filepath_set(pm_options_t *options, const char *filepath) {
 /**
  * Set the encoding option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_encoding_set(pm_options_t *options, const char *encoding) {
     pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
 }
@@ -28,7 +88,7 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
 /**
  * Set the encoding_locked option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
     options->encoding_locked = encoding_locked;
 }
@@ -36,7 +96,7 @@ pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
 /**
  * Set the line option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_line_set(pm_options_t *options, int32_t line) {
     options->line = line;
 }
@@ -44,7 +104,7 @@ pm_options_line_set(pm_options_t *options, int32_t line) {
 /**
  * Set the frozen string literal option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) {
     options->frozen_string_literal = frozen_string_literal ? PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED : PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
 }
@@ -52,7 +112,7 @@ pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_l
 /**
  * Sets the command line option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
     options->command_line = command_line;
 }
@@ -60,7 +120,7 @@ pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
 /**
  * Checks if the given slice represents a number.
  */
-static inline bool
+static PRISM_INLINE bool
 is_number(const char *string, size_t length) {
     return pm_strspn_decimal_digit((const uint8_t *) string, (ptrdiff_t) length) == length;
 }
@@ -70,7 +130,7 @@ is_number(const char *string, size_t length) {
  * string. If the string contains an invalid option, this returns false.
  * Otherwise, it returns true.
  */
-PRISM_EXPORTED_FUNCTION bool
+bool
 pm_options_version_set(pm_options_t *options, const char *version, size_t length) {
     if (version == NULL) {
         options->version = PM_OPTIONS_VERSION_LATEST;
@@ -88,33 +148,43 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
             return true;
         }
 
-        if (strncmp(version, "3.5", 3) == 0) {
-            options->version = PM_OPTIONS_VERSION_LATEST;
+        if (strncmp(version, "3.5", 3) == 0 || strncmp(version, "4.0", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+            return true;
+        }
+
+        if (strncmp(version, "4.1", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
             return true;
         }
 
         return false;
     }
 
-    if (length >= 4) {
-        if (strncmp(version, "3.3.", 4) == 0 && is_number(version + 4, length - 4)) {
+    if (length >= 4 && is_number(version + 4, length - 4)) {
+        if (strncmp(version, "3.3.", 4) == 0) {
             options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
             return true;
         }
 
-        if (strncmp(version, "3.4.", 4) == 0 && is_number(version + 4, length - 4)) {
+        if (strncmp(version, "3.4.", 4) == 0) {
             options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
             return true;
         }
 
-        if (strncmp(version, "3.5.", 4) == 0 && is_number(version + 4, length - 4)) {
-            options->version = PM_OPTIONS_VERSION_LATEST;
+        if (strncmp(version, "3.5.", 4) == 0 || strncmp(version, "4.0.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+            return true;
+        }
+
+        if (strncmp(version, "4.1.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
             return true;
         }
     }
 
-    if (length >= 6) {
-        if (strncmp(version, "latest", 7) == 0) { // 7 to compare the \0 as well
+    if (length == 6) {
+        if (strncmp(version, "latest", 6) == 0) {
             options->version = PM_OPTIONS_VERSION_LATEST;
             return true;
         }
@@ -124,9 +194,27 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
 }
 
 /**
+ * Set the version option on the given options struct to the lowest version of
+ * Ruby that prism supports.
+ */
+void
+pm_options_version_set_lowest(pm_options_t *options) {
+    options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+}
+
+/**
+ * Set the version option on the given options struct to the highest version of
+ * Ruby that prism supports.
+ */
+void
+pm_options_version_set_highest(pm_options_t *options) {
+    options->version = PM_OPTIONS_VERSION_LATEST;
+}
+
+/**
  * Set the main script option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_main_script_set(pm_options_t *options, bool main_script) {
     options->main_script = main_script;
 }
@@ -134,15 +222,23 @@ pm_options_main_script_set(pm_options_t *options, bool main_script) {
 /**
  * Set the partial script option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_partial_script_set(pm_options_t *options, bool partial_script) {
     options->partial_script = partial_script;
 }
 
 /**
+ * Get the freeze option on the given options struct.
+ */
+bool
+pm_options_freeze(const pm_options_t *options) {
+    return options->freeze;
+}
+
+/**
  * Set the freeze option on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_options_freeze_set(pm_options_t *options, bool freeze) {
     options->freeze = freeze;
 }
@@ -158,7 +254,7 @@ pm_options_freeze_set(pm_options_t *options, bool freeze) {
 /**
  * Allocate and zero out the scopes array on the given options struct.
  */
-PRISM_EXPORTED_FUNCTION bool
+bool
 pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
     options->scopes_count = scopes_count;
     options->scopes = xcalloc(scopes_count, sizeof(pm_options_scope_t));
@@ -166,10 +262,20 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
 }
 
 /**
- * Return a pointer to the scope at the given index within the given options.
+ * Return a constant pointer to the scope at the given index within the given
+ * options.
  */
-PRISM_EXPORTED_FUNCTION const pm_options_scope_t *
-pm_options_scope_get(const pm_options_t *options, size_t index) {
+const pm_options_scope_t *
+pm_options_scope(const pm_options_t *options, size_t index) {
+    return &options->scopes[index];
+}
+
+/**
+ * Return a mutable pointer to the scope at the given index within the given
+ * options.
+ */
+pm_options_scope_t *
+pm_options_scope_mut(pm_options_t *options, size_t index) {
     return &options->scopes[index];
 }
 
@@ -177,49 +283,38 @@ pm_options_scope_get(const pm_options_t *options, size_t index) {
  * Create a new options scope struct. This will hold a set of locals that are in
  * scope surrounding the code that is being parsed.
  */
-PRISM_EXPORTED_FUNCTION bool
+void
 pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) {
     scope->locals_count = locals_count;
     scope->locals = xcalloc(locals_count, sizeof(pm_string_t));
     scope->forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
-    return scope->locals != NULL;
+    if (scope->locals == NULL) abort();
 }
 
 /**
- * Return a pointer to the local at the given index within the given scope.
+ * Return a constant pointer to the local at the given index within the given
+ * scope.
  */
-PRISM_EXPORTED_FUNCTION const pm_string_t *
-pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) {
+const pm_string_t *
+pm_options_scope_local(const pm_options_scope_t *scope, size_t index) {
     return &scope->locals[index];
 }
 
 /**
- * Set the forwarding option on the given scope struct.
+ * Return a mutable pointer to the local at the given index within the given
+ * scope.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
-    scope->forwarding = forwarding;
+pm_string_t *
+pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) {
+    return &scope->locals[index];
 }
 
 /**
- * Free the internal memory associated with the options.
+ * Set the forwarding option on the given scope struct.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_options_free(pm_options_t *options) {
-    pm_string_free(&options->filepath);
-    pm_string_free(&options->encoding);
-
-    for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
-        pm_options_scope_t *scope = &options->scopes[scope_index];
-
-        for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
-            pm_string_free(&scope->locals[local_index]);
-        }
-
-        xfree(scope->locals);
-    }
-
-    xfree(options->scopes);
+void
+pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
+    scope->forwarding = forwarding;
 }
 
 /**
@@ -304,10 +399,7 @@ pm_options_read(pm_options_t *options, const char *data) {
             data += 4;
 
             pm_options_scope_t *scope = &options->scopes[scope_index];
-            if (!pm_options_scope_init(scope, locals_count)) {
-                pm_options_free(options);
-                return;
-            }
+            pm_options_scope_init(scope, locals_count);
 
             uint8_t forwarding = (uint8_t) *data++;
             pm_options_scope_forwarding_set(&options->scopes[scope_index], forwarding);
diff --git a/prism/options.h b/prism/options.h
index 2f64701b0c..0f5d7529b1 100644
--- a/prism/options.h
+++ b/prism/options.h
@@ -6,16 +6,27 @@
 #ifndef PRISM_OPTIONS_H
 #define PRISM_OPTIONS_H
 
-#include "prism/defines.h"
-#include "prism/util/pm_char.h"
-#include "prism/util/pm_string.h"
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/stringy.h"
 
 #include <stdbool.h>
 #include <stddef.h>
-#include <stdint.h>
 
 /**
- * String literals should be made frozen.
+ * A scope of locals surrounding the code that is being parsed.
+ */
+typedef struct pm_options_scope_t pm_options_scope_t;
+
+/**
+ * The options that can be passed to the parser.
+ */
+typedef struct pm_options_t pm_options_t;
+
+/**
+ * String literals should not be frozen.
  */
 #define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED   ((int8_t) -1)
 
@@ -26,42 +37,25 @@
 #define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET      ((int8_t)  0)
 
 /**
- * String literals should be made mutable.
+ * String literals should be made frozen.
  */
 #define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED    ((int8_t)  1)
 
-/**
- * A scope of locals surrounding the code that is being parsed.
- */
-typedef struct pm_options_scope {
-    /** The number of locals in the scope. */
-    size_t locals_count;
-
-    /** The names of the locals in the scope. */
-    pm_string_t *locals;
-
-    /** Flags for the set of forwarding parameters in this scope. */
-    uint8_t forwarding;
-} pm_options_scope_t;
-
 /** The default value for parameters. */
 static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0;
 
-/** When the scope is fowarding with the * parameter. */
+/** When the scope is forwarding with the * parameter. */
 static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS = 0x1;
 
-/** When the scope is fowarding with the ** parameter. */
+/** When the scope is forwarding with the ** parameter. */
 static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS = 0x2;
 
-/** When the scope is fowarding with the & parameter. */
+/** When the scope is forwarding with the & parameter. */
 static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4;
 
-/** When the scope is fowarding with the ... parameter. */
+/** When the scope is forwarding with the ... parameter. */
 static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8;
 
-// Forward declaration needed by the callback typedef.
-struct pm_options;
-
 /**
  * The callback called when additional switches are found in a shebang comment
  * that need to be processed by the runtime.
@@ -74,118 +68,7 @@ struct pm_options;
  * @param shebang_callback_data Any additional data that should be passed along
  *   to the callback.
  */
-typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
-
-/**
- * The version of Ruby syntax that we should be parsing with. This is used to
- * allow consumers to specify which behavior they want in case they need to
- * parse in the same way as a specific version of CRuby would have.
- */
-typedef enum {
-    /** The current version of prism. */
-    PM_OPTIONS_VERSION_LATEST = 0,
-
-    /** The vendored version of prism in CRuby 3.3.x. */
-    PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
-
-    /** The vendored version of prism in CRuby 3.4.x. */
-    PM_OPTIONS_VERSION_CRUBY_3_4 = 2
-} pm_options_version_t;
-
-/**
- * The options that can be passed to the parser.
- */
-typedef struct pm_options {
-    /**
-     * The callback to call when additional switches are found in a shebang
-     * comment.
-     */
-    pm_options_shebang_callback_t shebang_callback;
-
-    /**
-     * Any additional data that should be passed along to the shebang callback
-     * if one was set.
-     */
-    void *shebang_callback_data;
-
-    /** The name of the file that is currently being parsed. */
-    pm_string_t filepath;
-
-    /**
-     * The line within the file that the parse starts on. This value is
-     * 1-indexed.
-     */
-    int32_t line;
-
-    /**
-     * The name of the encoding that the source file is in. Note that this must
-     * correspond to a name that can be found with Encoding.find in Ruby.
-     */
-    pm_string_t encoding;
-
-    /**
-     * The number of scopes surrounding the code that is being parsed.
-     */
-    size_t scopes_count;
-
-    /**
-     * The scopes surrounding the code that is being parsed. For most parses
-     * this will be NULL, but for evals it will be the locals that are in scope
-     * surrounding the eval. Scopes are ordered from the outermost scope to the
-     * innermost one.
-     */
-    pm_options_scope_t *scopes;
-
-    /**
-     * The version of prism that we should be parsing with. This is used to
-     * allow consumers to specify which behavior they want in case they need to
-     * parse exactly as a specific version of CRuby.
-     */
-    pm_options_version_t version;
-
-    /** A bitset of the various options that were set on the command line. */
-    uint8_t command_line;
-
-    /**
-    * Whether or not the frozen string literal option has been set.
-    * May be:
-    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
-    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
-    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
-    */
-    int8_t frozen_string_literal;
-
-    /**
-     * Whether or not the encoding magic comments should be respected. This is a
-     * niche use-case where you want to parse a file with a specific encoding
-     * but ignore any encoding magic comments at the top of the file.
-     */
-    bool encoding_locked;
-
-    /**
-     * When the file being parsed is the main script, the shebang will be
-     * considered for command-line flags (or for implicit -x). The caller needs
-     * to pass this information to the parser so that it can behave correctly.
-     */
-    bool main_script;
-
-    /**
-     * When the file being parsed is considered a "partial" script, jumps will
-     * not be marked as errors if they are not contained within loops/blocks.
-     * This is used in the case that you're parsing a script that you know will
-     * be embedded inside another script later, but you do not have that context
-     * yet. For example, when parsing an ERB template that will be evaluated
-     * inside another script.
-     */
-    bool partial_script;
-
-    /**
-     * Whether or not the parser should freeze the nodes that it creates. This
-     * makes it possible to have a deeply frozen AST that is safe to share
-     * between concurrency primitives.
-     */
-    bool freeze;
-} pm_options_t;
+typedef void (*pm_options_shebang_callback_t)(pm_options_t *options, const uint8_t *source, size_t length, void *shebang_callback_data);
 
 /**
  * A bit representing whether or not the command line -a option was set. -a
@@ -220,11 +103,27 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
 
 /**
  * A bit representing whether or not the command line -x option was set. -x
- * searches the input file for a shebang that matches the current Ruby engine.
+ * searches the input file for a shebang that includes "ruby".
  */
 static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
 
 /**
+ * Allocate a new options struct. If the options struct cannot be allocated,
+ * this function aborts the process.
+ *
+ * @returns A new options struct with default values. It is the responsibility
+ *     of the caller to free this struct using pm_options_free().
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_options_t * pm_options_new(void);
+
+/**
+ * Free both the held memory of the given options struct and the struct itself.
+ *
+ * @param options The options struct to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) PRISM_NONNULL(1);
+
+/**
  * Set the shebang callback option on the given options struct.
  *
  * @param options The options struct to set the shebang callback on.
@@ -232,7 +131,15 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
  * @param shebang_callback_data Any additional data that should be passed along
  *   to the callback.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
+PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) PRISM_NONNULL(1);
+
+/**
+ * Get the filepath option on the given options struct.
+ *
+ * @param options The options struct to get the filepath from.
+ * @returns The filepath.
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options) PRISM_NONNULL(1);
 
 /**
  * Set the filepath option on the given options struct.
@@ -240,7 +147,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio
  * @param options The options struct to set the filepath on.
  * @param filepath The filepath to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
+PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath) PRISM_NONNULL(1);
 
 /**
  * Set the line option on the given options struct.
@@ -248,7 +155,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
  * @param options The options struct to set the line on.
  * @param line The line to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
+PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line) PRISM_NONNULL(1);
 
 /**
  * Set the encoding option on the given options struct.
@@ -256,7 +163,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
  * @param options The options struct to set the encoding on.
  * @param encoding The encoding to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding) PRISM_NONNULL(1);
 
 /**
  * Set the encoding_locked option on the given options struct.
@@ -264,7 +171,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons
  * @param options The options struct to set the encoding_locked value on.
  * @param encoding_locked The encoding_locked value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) PRISM_NONNULL(1);
 
 /**
  * Set the frozen string literal option on the given options struct.
@@ -272,7 +179,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *option
  * @param options The options struct to set the frozen string literal value on.
  * @param frozen_string_literal The frozen string literal value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
+PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) PRISM_NONNULL(1);
 
 /**
  * Sets the command line option on the given options struct.
@@ -280,7 +187,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *
  * @param options The options struct to set the command line option on.
  * @param command_line The command_line value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
+PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line) PRISM_NONNULL(1);
 
 /**
  * Set the version option on the given options struct by parsing the given
@@ -290,9 +197,25 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
  * @param options The options struct to set the version on.
  * @param version The version to set.
  * @param length The length of the version string.
- * @return Whether or not the version was parsed successfully.
+ * @returns Whether or not the version was parsed successfully.
  */
-PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
+PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length) PRISM_NONNULL(1);
+
+/**
+ * Set the version option on the given options struct to the lowest version of
+ * Ruby that prism supports.
+ *
+ * @param options The options struct to set the version on.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options) PRISM_NONNULL(1);
+
+/**
+ * Set the version option on the given options struct to the highest version of
+ * Ruby that prism supports.
+ *
+ * @param options The options struct to set the version on.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options) PRISM_NONNULL(1);
 
 /**
  * Set the main script option on the given options struct.
@@ -300,7 +223,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const
  * @param options The options struct to set the main script value on.
  * @param main_script The main script value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
+PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script) PRISM_NONNULL(1);
 
 /**
  * Set the partial script option on the given options struct.
@@ -308,7 +231,15 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b
  * @param options The options struct to set the partial script value on.
  * @param partial_script The partial script value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
+PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script) PRISM_NONNULL(1);
+
+/**
+ * Get the freeze option on the given options struct.
+ *
+ * @param options The options struct to get the freeze value from.
+ * @returns The freeze value.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options) PRISM_NONNULL(1);
 
 /**
  * Set the freeze option on the given options struct.
@@ -316,127 +247,73 @@ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options
  * @param options The options struct to set the freeze value on.
  * @param freeze The freeze value to set.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze);
+PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze) PRISM_NONNULL(1);
 
 /**
  * Allocate and zero out the scopes array on the given options struct.
  *
  * @param options The options struct to initialize the scopes array on.
  * @param scopes_count The number of scopes to allocate.
- * @return Whether or not the scopes array was initialized successfully.
+ * @returns Whether or not the scopes array was initialized successfully.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count) PRISM_NONNULL(1);
+
+/**
+ * Return a constant pointer to the scope at the given index within the given
+ * options.
+ *
+ * @param options The options struct to get the scope from.
+ * @param index The index of the scope to get.
+ * @returns A constant pointer to the scope at the given index.
  */
-PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
+PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index) PRISM_NONNULL(1);
 
 /**
- * Return a pointer to the scope at the given index within the given options.
+ * Return a mutable pointer to the scope at the given index within the given
+ * options.
  *
  * @param options The options struct to get the scope from.
  * @param index The index of the scope to get.
- * @return A pointer to the scope at the given index.
+ * @returns A mutable pointer to the scope at the given index.
  */
-PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
+PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index) PRISM_NONNULL(1);
 
 /**
  * Create a new options scope struct. This will hold a set of locals that are in
- * scope surrounding the code that is being parsed.
+ * scope surrounding the code that is being parsed. If the scope was unable to
+ * allocate its locals, this function will abort the process.
  *
  * @param scope The scope struct to initialize.
  * @param locals_count The number of locals to allocate.
- * @return Whether or not the scope was initialized successfully.
  */
-PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
+PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) PRISM_NONNULL(1);
 
 /**
- * Return a pointer to the local at the given index within the given scope.
+ * Return a constant pointer to the local at the given index within the given
+ * scope.
  *
  * @param scope The scope struct to get the local from.
  * @param index The index of the local to get.
- * @return A pointer to the local at the given index.
- */
-PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
-
-/**
- * Set the forwarding option on the given scope struct.
- *
- * @param scope The scope struct to set the forwarding on.
- * @param forwarding The forwarding value to set.
+ * @returns A constant pointer to the local at the given index.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding);
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1);
 
 /**
- * Free the internal memory associated with the options.
+ * Return a mutable pointer to the local at the given index within the given
+ * scope.
  *
- * @param options The options struct whose internal memory should be freed.
+ * @param scope The scope struct to get the local from.
+ * @param index The index of the local to get.
+ * @returns A mutable pointer to the local at the given index.
  */
-PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
+PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1);
 
 /**
- * Deserialize an options struct from the given binary string. This is used to
- * pass options to the parser from an FFI call so that consumers of the library
- * from an FFI perspective don't have to worry about the structure of our
- * options structs. Since the source of these calls will be from Ruby
- * implementation internals we assume it is from a trusted source.
- *
- * `data` is assumed to be a valid pointer pointing to well-formed data. The
- * layout of this data should be the same every time, and is described below:
- *
- * | # bytes | field                      |
- * | ------- | -------------------------- |
- * | `4`     | the length of the filepath |
- * | ...     | the filepath bytes         |
- * | `4`     | the line number            |
- * | `4`     | the length the encoding    |
- * | ...     | the encoding bytes         |
- * | `1`     | frozen string literal      |
- * | `1`     | -p command line option     |
- * | `1`     | -n command line option     |
- * | `1`     | -l command line option     |
- * | `1`     | -a command line option     |
- * | `1`     | the version                |
- * | `1`     | encoding locked            |
- * | `1`     | main script                |
- * | `1`     | partial script             |
- * | `1`     | freeze                     |
- * | `4`     | the number of scopes       |
- * | ...     | the scopes                 |
- *
- * The version field is an enum, so it should be one of the following values:
- *
- * | value | version                   |
- * | ----- | ------------------------- |
- * | `0`   | use the latest version of prism |
- * | `1`   | use the version of prism that is vendored in CRuby 3.3.0 |
- *
- * Each scope is laid out as follows:
- *
- * | # bytes | field                      |
- * | ------- | -------------------------- |
- * | `4`     | the number of locals       |
- * | `1`     | the forwarding flags       |
- * | ...     | the locals                 |
- *
- * Each local is laid out as follows:
- *
- * | # bytes | field                      |
- * | ------- | -------------------------- |
- * | `4`     | the length of the local    |
- * | ...     | the local bytes            |
- *
- * Some additional things to note about this layout:
- *
- * * The filepath can have a length of 0, in which case we'll consider it an
- *   empty string.
- * * The line number should be 0-indexed.
- * * The encoding can have a length of 0, in which case we'll use the default
- *   encoding (UTF-8). If it's not 0, it should correspond to a name of an
- *   encoding that can be passed to `Encoding.find` in Ruby.
- * * The frozen string literal, encoding locked, main script, and partial script
- *   fields are booleans, so their values should be either 0 or 1.
- * * The number of scopes can be 0.
+ * Set the forwarding option on the given scope struct.
  *
- * @param options The options struct to deserialize into.
- * @param data The binary string to deserialize from.
+ * @param scope The scope struct to set the forwarding on.
+ * @param forwarding The forwarding value to set.
  */
-void pm_options_read(pm_options_t *options, const char *data);
+PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) PRISM_NONNULL(1);
 
 #endif
diff --git a/prism/pack.c b/prism/pack.c
deleted file mode 100644
index 1388ca8a3b..0000000000
--- a/prism/pack.c
+++ /dev/null
@@ -1,509 +0,0 @@
-#include "prism/pack.h"
-
-// We optionally support parsing String#pack templates. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PACK define.
-#ifdef PRISM_EXCLUDE_PACK
-
-void pm_pack_parse(void) {}
-
-#else
-
-#include <stdbool.h>
-#include <errno.h>
-
-static uintmax_t
-strtoumaxc(const char **format) {
-    uintmax_t value = 0;
-    while (**format >= '0' && **format <= '9') {
-        if (value > UINTMAX_MAX / 10) {
-            errno = ERANGE;
-        }
-        value = value * 10 + ((uintmax_t) (**format - '0'));
-        (*format)++;
-    }
-    return value;
-}
-
-PRISM_EXPORTED_FUNCTION pm_pack_result
-pm_pack_parse(
-    pm_pack_variant variant,
-    const char **format,
-    const char *format_end,
-    pm_pack_type *type,
-    pm_pack_signed *signed_type,
-    pm_pack_endian *endian,
-    pm_pack_size *size,
-    pm_pack_length_type *length_type,
-    uint64_t *length,
-    pm_pack_encoding *encoding
-) {
-    if (*encoding == PM_PACK_ENCODING_START) {
-        *encoding = PM_PACK_ENCODING_US_ASCII;
-    }
-
-    if (*format == format_end) {
-            *type = PM_PACK_END;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            *length_type = PM_PACK_LENGTH_NA;
-            return PM_PACK_OK;
-    }
-
-    *length_type = PM_PACK_LENGTH_FIXED;
-    *length = 1;
-    bool length_changed_allowed = true;
-
-    char directive = **format;
-    (*format)++;
-    switch (directive) {
-        case ' ':
-        case '\t':
-        case '\n':
-        case '\v':
-        case '\f':
-        case '\r':
-            *type = PM_PACK_SPACE;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            *length_type = PM_PACK_LENGTH_NA;
-            *length = 0;
-            return PM_PACK_OK;
-        case '#':
-            while ((*format < format_end) && (**format != '\n')) {
-                (*format)++;
-            }
-            *type = PM_PACK_COMMENT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            *length_type = PM_PACK_LENGTH_NA;
-            *length = 0;
-            return PM_PACK_OK;
-        case 'C':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_AGNOSTIC_ENDIAN;
-            *size = PM_PACK_SIZE_8;
-            break;
-        case 'S':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_16;
-            break;
-        case 'L':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            break;
-        case 'Q':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_64;
-            break;
-        case 'J':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_P;
-            break;
-        case 'c':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_AGNOSTIC_ENDIAN;
-            *size = PM_PACK_SIZE_8;
-            break;
-        case 's':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_16;
-            break;
-        case 'l':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            break;
-        case 'q':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_64;
-            break;
-        case 'j':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_P;
-            break;
-        case 'I':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_INT;
-            break;
-        case 'i':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_SIGNED;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_INT;
-            break;
-        case 'n':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_BIG_ENDIAN;
-            *size = PM_PACK_SIZE_16;
-            length_changed_allowed = false;
-            break;
-        case 'N':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_BIG_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            length_changed_allowed = false;
-            break;
-        case 'v':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_LITTLE_ENDIAN;
-            *size = PM_PACK_SIZE_16;
-            length_changed_allowed = false;
-            break;
-        case 'V':
-            *type = PM_PACK_INTEGER;
-            *signed_type = PM_PACK_UNSIGNED;
-            *endian = PM_PACK_LITTLE_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            length_changed_allowed = false;
-            break;
-        case 'U':
-            *type = PM_PACK_UTF8;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'w':
-            *type = PM_PACK_BER;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'D':
-        case 'd':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_64;
-            break;
-        case 'F':
-        case 'f':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_NATIVE_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            break;
-        case 'E':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_LITTLE_ENDIAN;
-            *size = PM_PACK_SIZE_64;
-            break;
-        case 'e':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_LITTLE_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            break;
-        case 'G':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_BIG_ENDIAN;
-            *size = PM_PACK_SIZE_64;
-            break;
-        case 'g':
-            *type = PM_PACK_FLOAT;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_BIG_ENDIAN;
-            *size = PM_PACK_SIZE_32;
-            break;
-        case 'A':
-            *type = PM_PACK_STRING_SPACE_PADDED;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'a':
-            *type = PM_PACK_STRING_NULL_PADDED;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'Z':
-            *type = PM_PACK_STRING_NULL_TERMINATED;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'B':
-            *type = PM_PACK_STRING_MSB;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'b':
-            *type = PM_PACK_STRING_LSB;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'H':
-            *type = PM_PACK_STRING_HEX_HIGH;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'h':
-            *type = PM_PACK_STRING_HEX_LOW;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'u':
-            *type = PM_PACK_STRING_UU;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'M':
-            *type = PM_PACK_STRING_MIME;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'm':
-            *type = PM_PACK_STRING_BASE64;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'P':
-            *type = PM_PACK_STRING_FIXED;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'p':
-            *type = PM_PACK_STRING_POINTER;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case '@':
-            *type = PM_PACK_MOVE;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'X':
-            *type = PM_PACK_BACK;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case 'x':
-            *type = PM_PACK_NULL;
-            *signed_type = PM_PACK_SIGNED_NA;
-            *endian = PM_PACK_ENDIAN_NA;
-            *size = PM_PACK_SIZE_NA;
-            break;
-        case '%':
-            return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
-        default:
-            return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
-    }
-
-    bool explicit_endian = false;
-
-    while (*format < format_end) {
-        switch (**format) {
-            case '_':
-            case '!':
-                (*format)++;
-                if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
-                    return PM_PACK_ERROR_BANG_NOT_ALLOWED;
-                }
-                switch (*size) {
-                    case PM_PACK_SIZE_SHORT:
-                    case PM_PACK_SIZE_INT:
-                    case PM_PACK_SIZE_LONG:
-                    case PM_PACK_SIZE_LONG_LONG:
-                        break;
-                    case PM_PACK_SIZE_16:
-                        *size = PM_PACK_SIZE_SHORT;
-                        break;
-                    case PM_PACK_SIZE_32:
-                        *size = PM_PACK_SIZE_LONG;
-                        break;
-                    case PM_PACK_SIZE_64:
-                        *size = PM_PACK_SIZE_LONG_LONG;
-                        break;
-                    case PM_PACK_SIZE_P:
-                        break;
-                    default:
-                        return PM_PACK_ERROR_BANG_NOT_ALLOWED;
-                }
-                break;
-            case '<':
-                (*format)++;
-                if (explicit_endian) {
-                    return PM_PACK_ERROR_DOUBLE_ENDIAN;
-                }
-                *endian = PM_PACK_LITTLE_ENDIAN;
-                explicit_endian = true;
-                break;
-            case '>':
-                (*format)++;
-                if (explicit_endian) {
-                    return PM_PACK_ERROR_DOUBLE_ENDIAN;
-                }
-                *endian = PM_PACK_BIG_ENDIAN;
-                explicit_endian = true;
-                break;
-            default:
-                goto exit_modifier_loop;
-        }
-    }
-
-exit_modifier_loop:
-
-    if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
-        *length = 0;
-    }
-
-    if (*format < format_end) {
-        if (**format == '*') {
-            switch (*type) {
-                case PM_PACK_NULL:
-                case PM_PACK_BACK:
-                    switch (variant) {
-                        case PM_PACK_VARIANT_PACK:
-                            *length_type = PM_PACK_LENGTH_FIXED;
-                            break;
-                        case PM_PACK_VARIANT_UNPACK:
-                            *length_type = PM_PACK_LENGTH_MAX;
-                            break;
-                    }
-                    *length = 0;
-                    break;
-
-                case PM_PACK_MOVE:
-                    switch (variant) {
-                        case PM_PACK_VARIANT_PACK:
-                            *length_type = PM_PACK_LENGTH_FIXED;
-                            break;
-                        case PM_PACK_VARIANT_UNPACK:
-                            *length_type = PM_PACK_LENGTH_RELATIVE;
-                            break;
-                    }
-                    *length = 0;
-                    break;
-
-                case PM_PACK_STRING_UU:
-                    *length_type = PM_PACK_LENGTH_FIXED;
-                    *length = 0;
-                    break;
-
-                case PM_PACK_STRING_FIXED:
-                    switch (variant) {
-                        case PM_PACK_VARIANT_PACK:
-                            *length_type = PM_PACK_LENGTH_FIXED;
-                            *length = 1;
-                            break;
-                        case PM_PACK_VARIANT_UNPACK:
-                            *length_type = PM_PACK_LENGTH_MAX;
-                            *length = 0;
-                            break;
-                    }
-                    break;
-
-                case PM_PACK_STRING_MIME:
-                case PM_PACK_STRING_BASE64:
-                    *length_type = PM_PACK_LENGTH_FIXED;
-                    *length = 1;
-                    break;
-
-                default:
-                    *length_type = PM_PACK_LENGTH_MAX;
-                    *length = 0;
-                    break;
-            }
-
-            (*format)++;
-        } else if (**format >= '0' && **format <= '9') {
-            errno = 0;
-            *length_type = PM_PACK_LENGTH_FIXED;
-            #if UINTMAX_MAX < UINT64_MAX
-                #error "prism's design assumes uintmax_t is at least as large as uint64_t"
-            #endif
-            uintmax_t length_max = strtoumaxc(format);
-            if (errno || length_max > UINT64_MAX) {
-                return PM_PACK_ERROR_LENGTH_TOO_BIG;
-            }
-            *length = (uint64_t) length_max;
-        }
-    }
-
-    switch (*type) {
-        case PM_PACK_UTF8:
-            /* if encoding is US-ASCII, upgrade to UTF-8 */
-            if (*encoding == PM_PACK_ENCODING_US_ASCII) {
-                *encoding = PM_PACK_ENCODING_UTF_8;
-            }
-            break;
-        case PM_PACK_STRING_MIME:
-        case PM_PACK_STRING_BASE64:
-        case PM_PACK_STRING_UU:
-            /* keep US-ASCII (do nothing) */
-            break;
-        default:
-            /* fall back to BINARY */
-            *encoding = PM_PACK_ENCODING_ASCII_8BIT;
-            break;
-    }
-
-    return PM_PACK_OK;
-}
-
-PRISM_EXPORTED_FUNCTION size_t
-pm_size_to_native(pm_pack_size size) {
-    switch (size) {
-        case PM_PACK_SIZE_SHORT:
-            return sizeof(short);
-        case PM_PACK_SIZE_INT:
-            return sizeof(int);
-        case PM_PACK_SIZE_LONG:
-            return sizeof(long);
-        case PM_PACK_SIZE_LONG_LONG:
-            return sizeof(long long);
-        case PM_PACK_SIZE_8:
-            return 1;
-        case PM_PACK_SIZE_16:
-            return 2;
-        case PM_PACK_SIZE_32:
-            return 4;
-        case PM_PACK_SIZE_64:
-            return 8;
-        case PM_PACK_SIZE_P:
-            return sizeof(void *);
-        default:
-            return 0;
-    }
-}
-
-#endif
diff --git a/prism/pack.h b/prism/pack.h
deleted file mode 100644
index 0b0b4b19cc..0000000000
--- a/prism/pack.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * @file pack.h
- *
- * A pack template string parser.
- */
-#ifndef PRISM_PACK_H
-#define PRISM_PACK_H
-
-#include "prism/defines.h"
-
-// We optionally support parsing String#pack templates. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PACK define.
-#ifdef PRISM_EXCLUDE_PACK
-
-void pm_pack_parse(void);
-
-#else
-
-#include <stdint.h>
-#include <stdlib.h>
-
-/** The version of the pack template language that we are parsing. */
-typedef enum pm_pack_version {
-    PM_PACK_VERSION_3_2_0
-} pm_pack_version;
-
-/** The type of pack template we are parsing. */
-typedef enum pm_pack_variant {
-    PM_PACK_VARIANT_PACK,
-    PM_PACK_VARIANT_UNPACK
-} pm_pack_variant;
-
-/** A directive within the pack template. */
-typedef enum pm_pack_type {
-    PM_PACK_SPACE,
-    PM_PACK_COMMENT,
-    PM_PACK_INTEGER,
-    PM_PACK_UTF8,
-    PM_PACK_BER,
-    PM_PACK_FLOAT,
-    PM_PACK_STRING_SPACE_PADDED,
-    PM_PACK_STRING_NULL_PADDED,
-    PM_PACK_STRING_NULL_TERMINATED,
-    PM_PACK_STRING_MSB,
-    PM_PACK_STRING_LSB,
-    PM_PACK_STRING_HEX_HIGH,
-    PM_PACK_STRING_HEX_LOW,
-    PM_PACK_STRING_UU,
-    PM_PACK_STRING_MIME,
-    PM_PACK_STRING_BASE64,
-    PM_PACK_STRING_FIXED,
-    PM_PACK_STRING_POINTER,
-    PM_PACK_MOVE,
-    PM_PACK_BACK,
-    PM_PACK_NULL,
-    PM_PACK_END
-} pm_pack_type;
-
-/** The signness of a pack directive. */
-typedef enum pm_pack_signed {
-    PM_PACK_UNSIGNED,
-    PM_PACK_SIGNED,
-    PM_PACK_SIGNED_NA
-} pm_pack_signed;
-
-/** The endianness of a pack directive. */
-typedef enum pm_pack_endian {
-    PM_PACK_AGNOSTIC_ENDIAN,
-    PM_PACK_LITTLE_ENDIAN,      // aka 'VAX', or 'V'
-    PM_PACK_BIG_ENDIAN,         // aka 'network', or 'N'
-    PM_PACK_NATIVE_ENDIAN,
-    PM_PACK_ENDIAN_NA
-} pm_pack_endian;
-
-/** The size of an integer pack directive. */
-typedef enum pm_pack_size {
-    PM_PACK_SIZE_SHORT,
-    PM_PACK_SIZE_INT,
-    PM_PACK_SIZE_LONG,
-    PM_PACK_SIZE_LONG_LONG,
-    PM_PACK_SIZE_8,
-    PM_PACK_SIZE_16,
-    PM_PACK_SIZE_32,
-    PM_PACK_SIZE_64,
-    PM_PACK_SIZE_P,
-    PM_PACK_SIZE_NA
-} pm_pack_size;
-
-/** The type of length of a pack directive. */
-typedef enum pm_pack_length_type {
-    PM_PACK_LENGTH_FIXED,
-    PM_PACK_LENGTH_MAX,
-    PM_PACK_LENGTH_RELATIVE,  // special case for unpack @*
-    PM_PACK_LENGTH_NA
-} pm_pack_length_type;
-
-/** The type of encoding for a pack template string. */
-typedef enum pm_pack_encoding {
-    PM_PACK_ENCODING_START,
-    PM_PACK_ENCODING_ASCII_8BIT,
-    PM_PACK_ENCODING_US_ASCII,
-    PM_PACK_ENCODING_UTF_8
-} pm_pack_encoding;
-
-/** The result of parsing a pack template. */
-typedef enum pm_pack_result {
-    PM_PACK_OK,
-    PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
-    PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
-    PM_PACK_ERROR_LENGTH_TOO_BIG,
-    PM_PACK_ERROR_BANG_NOT_ALLOWED,
-    PM_PACK_ERROR_DOUBLE_ENDIAN
-} pm_pack_result;
-
-/**
- * Parse a single directive from a pack or unpack format string.
- *
- * @param variant (in) pack or unpack
- * @param format (in, out) the start of the next directive to parse on calling,
- *     and advanced beyond the parsed directive on return, or as much of it as
- *     was consumed until an error was encountered
- * @param format_end (in) the end of the format string
- * @param type (out) the type of the directive
- * @param signed_type (out) whether the value is signed
- * @param endian (out) the endianness of the value
- * @param size (out) the size of the value
- * @param length_type (out) what kind of length is specified
- * @param length (out) the length of the directive
- * @param encoding (in, out) takes the current encoding of the string which
- *     would result from parsing the whole format string, and returns a possibly
- *     changed directive - the encoding should be `PM_PACK_ENCODING_START` when
- *     pm_pack_parse is called for the first directive in a format string
- *
- * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
- * @note Consult Ruby documentation for the meaning of directives.
- */
-PRISM_EXPORTED_FUNCTION pm_pack_result
-pm_pack_parse(
-    pm_pack_variant variant,
-    const char **format,
-    const char *format_end,
-    pm_pack_type *type,
-    pm_pack_signed *signed_type,
-    pm_pack_endian *endian,
-    pm_pack_size *size,
-    pm_pack_length_type *length_type,
-    uint64_t *length,
-    pm_pack_encoding *encoding
-);
-
-/**
- * Prism abstracts sizes away from the native system - this converts an abstract
- * size to a native size.
- *
- * @param size The abstract size to convert.
- * @return The native size.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
-
-#endif
-
-#endif
diff --git a/prism/parser.c b/prism/parser.c
new file mode 100644
index 0000000000..415cd31984
--- /dev/null
+++ b/prism/parser.c
@@ -0,0 +1,302 @@
+#include "prism/internal/parser.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/magic_comments.h"
+
+#include <stdlib.h>
+
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ */
+void
+pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
+    parser->encoding_changed_callback = callback;
+}
+
+/**
+ * Register a callback that will be called whenever a token is lexed.
+ */
+void
+pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) {
+    parser->lex_callback.callback = callback;
+    parser->lex_callback.data = data;
+}
+
+/**
+ * Returns the opaque data that is passed to the lex callback when it is called.
+ */
+void *
+pm_parser_lex_callback_data(const pm_parser_t *parser) {
+    return parser->lex_callback.data;
+}
+
+/**
+ * Returns the raw pointer to the start of the source that is being parsed.
+ */
+const uint8_t *
+pm_parser_start(const pm_parser_t *parser) {
+    return parser->start;
+}
+
+/**
+ * Returns the raw pointer to the end of the source that is being parsed.
+ */
+const uint8_t *
+pm_parser_end(const pm_parser_t *parser) {
+    return parser->end;
+}
+
+/**
+ * Returns the line that the parser was considered to have started on.
+ *
+ * @param parser the parser whose start line we want to get
+ * @return the line that the parser was considered to have started on
+ */
+int32_t
+pm_parser_start_line(const pm_parser_t *parser) {
+    return parser->start_line;
+}
+
+/**
+ * Returns the name of the encoding that is being used to parse the source.
+ */
+const char *
+pm_parser_encoding_name(const pm_parser_t *parser) {
+    return parser->encoding->name;
+}
+
+/**
+ * Returns the width of the character at the given pointer in the encoding that
+ * is being used to parse the source.
+ */
+size_t
+pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) {
+    return parser->encoding->char_width(start, remaining);
+}
+
+/**
+ * Returns whether or not the parser is using the US-ASCII encoding.
+ */
+bool
+pm_parser_encoding_us_ascii(const pm_parser_t *parser) {
+    return parser->encoding == PM_ENCODING_US_ASCII_ENTRY;
+}
+
+/**
+ * Returns the filepath that is being used to parse the source.
+ */
+const pm_string_t *
+pm_parser_filepath(const pm_parser_t *parser) {
+    return &parser->filepath;
+}
+
+/**
+ * Find a constant in the parser's constant pool. Returns the id of the
+ * constant, or 0 if the constant is not found.
+ */
+pm_constant_id_t
+pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) {
+    return pm_constant_pool_find(&parser->constant_pool, start, length);
+}
+
+/**
+ * Returns the frozen string literal value of the parser.
+ */
+int8_t
+pm_parser_frozen_string_literal(const pm_parser_t *parser) {
+    return parser->frozen_string_literal;
+}
+
+/**
+ * Returns the line offsets that are associated with the given parser.
+ *
+ * @param parser the parser whose line offsets we want to get
+ * @return the line offsets that are associated with the given parser
+ */
+const pm_line_offset_list_t *
+pm_parser_line_offsets(const pm_parser_t *parser) {
+    return &parser->line_offsets;
+}
+
+/**
+ * Returns the location of the __DATA__ section that is associated with the
+ * given parser, if it exists.
+ */
+const pm_location_t *
+pm_parser_data_loc(const pm_parser_t *parser) {
+    return &parser->data_loc;
+}
+
+/**
+ * Returns whether the given parser is continuable, meaning that it could become
+ * valid if more input were appended, as opposed to being definitively invalid.
+ */
+bool
+pm_parser_continuable(const pm_parser_t *parser) {
+    return parser->continuable;
+}
+
+/**
+ * Returns the lex state of the parser. Note that this is an internal detail,
+ * and we are purposefully not returning an instance of the internal enum that
+ * we use to track this. This is only exposed because we need it for some very
+ * niche use cases. Most consumers should avoid this function.
+ */
+int
+pm_parser_lex_state(const pm_parser_t *parser) {
+    return (int) parser->lex_state;
+}
+
+/**
+ * Returns the location associated with the given comment.
+ */
+pm_location_t
+pm_comment_location(const pm_comment_t *comment) {
+    return comment->location;
+}
+
+/**
+ * Returns the type associated with the given comment.
+ */
+pm_comment_type_t
+pm_comment_type(const pm_comment_t *comment) {
+    return comment->type;
+}
+
+/**
+ * Returns the number of comments associated with the given parser.
+ */
+size_t
+pm_parser_comments_size(const pm_parser_t *parser) {
+    return parser->comment_list.size;
+}
+
+/**
+ * Iterates over the comments associated with the given parser and calls the
+ * given callback for each comment.
+ */
+void
+pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) {
+    const pm_list_node_t *current = parser->comment_list.head;
+    while (current != NULL) {
+        const pm_comment_t *comment = (const pm_comment_t *) current;
+        callback(comment, data);
+        current = current->next;
+    }
+}
+
+/**
+ * Returns the location associated with the given magic comment key.
+ */
+pm_location_t
+pm_magic_comment_key(const pm_magic_comment_t *magic_comment) {
+    return magic_comment->key;
+}
+
+/**
+ * Returns the location associated with the given magic comment value.
+ */
+pm_location_t
+pm_magic_comment_value(const pm_magic_comment_t *magic_comment) {
+    return magic_comment->value;
+}
+
+/**
+ * Returns the number of magic comments associated with the given parser.
+ */
+size_t
+pm_parser_magic_comments_size(const pm_parser_t *parser) {
+    return parser->magic_comment_list.size;
+}
+
+/**
+ * Iterates over the magic comments associated with the given parser and calls
+ * the given callback for each magic comment.
+ */
+void
+pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) {
+    const pm_list_node_t *current = parser->magic_comment_list.head;
+    while (current != NULL) {
+        const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) current;
+        callback(magic_comment, data);
+        current = current->next;
+    }
+}
+
+/**
+ * Returns the number of errors associated with the given parser.
+ */
+size_t
+pm_parser_errors_size(const pm_parser_t *parser) {
+    return parser->error_list.size;
+}
+
+/**
+ * Returns the number of warnings associated with the given parser.
+ */
+size_t
+pm_parser_warnings_size(const pm_parser_t *parser) {
+    return parser->warning_list.size;
+}
+
+static inline void
+pm_parser_diagnostics_each(const pm_list_t *list, pm_diagnostic_callback_t callback, void *data) {
+    const pm_list_node_t *current = list->head;
+    while (current != NULL) {
+        const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) current;
+        callback(diagnostic, data);
+        current = current->next;
+    }
+}
+
+/**
+ * Iterates over the errors associated with the given parser and calls the
+ * given callback for each error.
+ */
+void
+pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) {
+    pm_parser_diagnostics_each(&parser->error_list, callback, data);
+}
+
+/**
+ * Iterates over the warnings associated with the given parser and calls the
+ * given callback for each warning.
+ */
+void
+pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) {
+    pm_parser_diagnostics_each(&parser->warning_list, callback, data);
+}
+
+/**
+ * Returns the number of constants in the constant pool associated with the
+ * given parser.
+ */
+size_t
+pm_parser_constants_size(const pm_parser_t *parser) {
+    return parser->constant_pool.size;
+}
+
+/**
+ * Iterates over the constants in the constant pool associated with the given
+ * parser and calls the given callback for each constant.
+ */
+void
+pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) {
+    for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
+        const pm_constant_t *constant = &parser->constant_pool.constants[index];
+        callback(constant, data);
+    }
+}
+
+/**
+ * Returns a pointer to the constant at the given id in the constant pool
+ * associated with the given parser.
+ */
+const pm_constant_t *
+pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) {
+    return pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+}
diff --git a/prism/parser.h b/prism/parser.h
index 992729d655..2c8c4b3a7a 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -6,928 +6,343 @@
 #ifndef PRISM_PARSER_H
 #define PRISM_PARSER_H
 
-#include "prism/defines.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
 #include "prism/ast.h"
-#include "prism/encoding.h"
+#include "prism/comments.h"
+#include "prism/diagnostic.h"
+#include "prism/line_offset_list.h"
+#include "prism/magic_comments.h"
 #include "prism/options.h"
-#include "prism/static_literals.h"
-#include "prism/util/pm_constant_pool.h"
-#include "prism/util/pm_list.h"
-#include "prism/util/pm_newline_list.h"
-#include "prism/util/pm_string.h"
-
-#include <stdbool.h>
-
-/**
- * This enum provides various bits that represent different kinds of states that
- * the lexer can track. This is used to determine which kind of token to return
- * based on the context of the parser.
- */
-typedef enum {
-    PM_LEX_STATE_BIT_BEG,
-    PM_LEX_STATE_BIT_END,
-    PM_LEX_STATE_BIT_ENDARG,
-    PM_LEX_STATE_BIT_ENDFN,
-    PM_LEX_STATE_BIT_ARG,
-    PM_LEX_STATE_BIT_CMDARG,
-    PM_LEX_STATE_BIT_MID,
-    PM_LEX_STATE_BIT_FNAME,
-    PM_LEX_STATE_BIT_DOT,
-    PM_LEX_STATE_BIT_CLASS,
-    PM_LEX_STATE_BIT_LABEL,
-    PM_LEX_STATE_BIT_LABELED,
-    PM_LEX_STATE_BIT_FITEM
-} pm_lex_state_bit_t;
-
-/**
- * This enum combines the various bits from the above enum into individual
- * values that represent the various states of the lexer.
- */
-typedef enum {
-    PM_LEX_STATE_NONE = 0,
-    PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
-    PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
-    PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
-    PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
-    PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
-    PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
-    PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
-    PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
-    PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
-    PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
-    PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
-    PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
-    PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
-    PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
-    PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
-    PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
-} pm_lex_state_t;
-
-/**
- * The type of quote that a heredoc uses.
- */
-typedef enum {
-    PM_HEREDOC_QUOTE_NONE,
-    PM_HEREDOC_QUOTE_SINGLE = '\'',
-    PM_HEREDOC_QUOTE_DOUBLE = '"',
-    PM_HEREDOC_QUOTE_BACKTICK = '`',
-} pm_heredoc_quote_t;
-
-/**
- * The type of indentation that a heredoc uses.
- */
-typedef enum {
-    PM_HEREDOC_INDENT_NONE,
-    PM_HEREDOC_INDENT_DASH,
-    PM_HEREDOC_INDENT_TILDE,
-} pm_heredoc_indent_t;
-
-/**
- * All of the information necessary to store to lexing a heredoc.
- */
-typedef struct {
-    /** A pointer to the start of the heredoc identifier. */
-    const uint8_t *ident_start;
-
-    /** The length of the heredoc identifier. */
-    size_t ident_length;
-
-    /** The type of quote that the heredoc uses. */
-    pm_heredoc_quote_t quote;
-
-    /** The type of indentation that the heredoc uses. */
-    pm_heredoc_indent_t indent;
-} pm_heredoc_lex_mode_t;
-
-/**
- * When lexing Ruby source, the lexer has a small amount of state to tell which
- * kind of token it is currently lexing. For example, when we find the start of
- * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
- * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
- * are found as part of a string.
- */
-typedef struct pm_lex_mode {
-    /** The type of this lex mode. */
-    enum {
-        /** This state is used when any given token is being lexed. */
-        PM_LEX_DEFAULT,
-
-        /**
-         * This state is used when we're lexing as normal but inside an embedded
-         * expression of a string.
-         */
-        PM_LEX_EMBEXPR,
-
-        /**
-         * This state is used when we're lexing a variable that is embedded
-         * directly inside of a string with the # shorthand.
-         */
-        PM_LEX_EMBVAR,
-
-        /** This state is used when you are inside the content of a heredoc. */
-        PM_LEX_HEREDOC,
-
-        /**
-         * This state is used when we are lexing a list of tokens, as in a %w
-         * word list literal or a %i symbol list literal.
-         */
-        PM_LEX_LIST,
-
-        /**
-         * This state is used when a regular expression has been begun and we
-         * are looking for the terminator.
-         */
-        PM_LEX_REGEXP,
-
-        /**
-         * This state is used when we are lexing a string or a string-like
-         * token, as in string content with either quote or an xstring.
-         */
-        PM_LEX_STRING
-    } mode;
-
-    /** The data associated with this type of lex mode. */
-    union {
-        struct {
-            /** This keeps track of the nesting level of the list. */
-            size_t nesting;
-
-            /** Whether or not interpolation is allowed in this list. */
-            bool interpolation;
-
-            /**
-             * When lexing a list, it takes into account balancing the
-             * terminator if the terminator is one of (), [], {}, or <>.
-             */
-            uint8_t incrementor;
-
-            /** This is the terminator of the list literal. */
-            uint8_t terminator;
-
-            /**
-             * This is the character set that should be used to delimit the
-             * tokens within the list.
-             */
-            uint8_t breakpoints[11];
-        } list;
-
-        struct {
-            /**
-             * This keeps track of the nesting level of the regular expression.
-             */
-            size_t nesting;
-
-            /**
-             * When lexing a regular expression, it takes into account balancing
-             * the terminator if the terminator is one of (), [], {}, or <>.
-             */
-            uint8_t incrementor;
-
-            /** This is the terminator of the regular expression. */
-            uint8_t terminator;
-
-            /**
-             * This is the character set that should be used to delimit the
-             * tokens within the regular expression.
-             */
-            uint8_t breakpoints[7];
-        } regexp;
-
-        struct {
-            /** This keeps track of the nesting level of the string. */
-            size_t nesting;
-
-            /** Whether or not interpolation is allowed in this string. */
-            bool interpolation;
-
-            /**
-             * Whether or not at the end of the string we should allow a :,
-             * which would indicate this was a dynamic symbol instead of a
-             * string.
-             */
-            bool label_allowed;
-
-            /**
-             * When lexing a string, it takes into account balancing the
-             * terminator if the terminator is one of (), [], {}, or <>.
-             */
-            uint8_t incrementor;
-
-            /**
-             * This is the terminator of the string. It is typically either a
-             * single or double quote.
-             */
-            uint8_t terminator;
-
-            /**
-             * This is the character set that should be used to delimit the
-             * tokens within the string.
-             */
-            uint8_t breakpoints[7];
-        } string;
-
-        struct {
-            /**
-             * All of the data necessary to lex a heredoc.
-             */
-            pm_heredoc_lex_mode_t base;
-
-            /**
-             * This is the pointer to the character where lexing should resume
-             * once the heredoc has been completely processed.
-             */
-            const uint8_t *next_start;
-
-            /**
-             * This is used to track the amount of common whitespace on each
-             * line so that we know how much to dedent each line in the case of
-             * a tilde heredoc.
-             */
-            size_t *common_whitespace;
-
-            /** True if the previous token ended with a line continuation. */
-            bool line_continuation;
-        } heredoc;
-    } as;
-
-    /** The previous lex state so that it knows how to pop. */
-    struct pm_lex_mode *prev;
-} pm_lex_mode_t;
-
-/**
- * We pre-allocate a certain number of lex states in order to avoid having to
- * call malloc too many times while parsing. You really shouldn't need more than
- * this because you only really nest deeply when doing string interpolation.
- */
-#define PM_LEX_STACK_SIZE 4
 
 /**
  * The parser used to parse Ruby source.
  */
-typedef struct pm_parser pm_parser_t;
+typedef struct pm_parser_t pm_parser_t;
 
 /**
- * While parsing, we keep track of a stack of contexts. This is helpful for
- * error recovery so that we can pop back to a previous context when we hit a
- * token that is understood by a parent context but not by the current context.
+ * Allocate and initialize a parser with the given start and end pointers.
+ *
+ * @param arena The arena to use for all AST-lifetime allocations. It is caller-
+ *     owned and must outlive the parser.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param options The optional options to use when parsing. These options must
+ *     live for the whole lifetime of this parser.
+ * @returns The initialized parser. It is the responsibility of the caller to
+ *     free the parser with `pm_parser_free()`.
  */
-typedef enum {
-    /** a null context, used for returning a value from a function */
-    PM_CONTEXT_NONE = 0,
-
-    /** a begin statement */
-    PM_CONTEXT_BEGIN,
-
-    /** an ensure statement with an explicit begin */
-    PM_CONTEXT_BEGIN_ENSURE,
-
-    /** a rescue else statement with an explicit begin */
-    PM_CONTEXT_BEGIN_ELSE,
-
-    /** a rescue statement with an explicit begin */
-    PM_CONTEXT_BEGIN_RESCUE,
-
-    /** expressions in block arguments using braces */
-    PM_CONTEXT_BLOCK_BRACES,
-
-    /** expressions in block arguments using do..end */
-    PM_CONTEXT_BLOCK_KEYWORDS,
-
-    /** an ensure statement within a do..end block */
-    PM_CONTEXT_BLOCK_ENSURE,
-
-    /** a rescue else statement within a do..end block */
-    PM_CONTEXT_BLOCK_ELSE,
-
-    /** a rescue statement within a do..end block */
-    PM_CONTEXT_BLOCK_RESCUE,
-
-    /** a case when statements */
-    PM_CONTEXT_CASE_WHEN,
-
-    /** a case in statements */
-    PM_CONTEXT_CASE_IN,
-
-    /** a class declaration */
-    PM_CONTEXT_CLASS,
-
-    /** an ensure statement within a class statement */
-    PM_CONTEXT_CLASS_ENSURE,
-
-    /** a rescue else statement within a class statement */
-    PM_CONTEXT_CLASS_ELSE,
-
-    /** a rescue statement within a class statement */
-    PM_CONTEXT_CLASS_RESCUE,
-
-    /** a method definition */
-    PM_CONTEXT_DEF,
-
-    /** an ensure statement within a method definition */
-    PM_CONTEXT_DEF_ENSURE,
-
-    /** a rescue else statement within a method definition */
-    PM_CONTEXT_DEF_ELSE,
-
-    /** a rescue statement within a method definition */
-    PM_CONTEXT_DEF_RESCUE,
-
-    /** a method definition's parameters */
-    PM_CONTEXT_DEF_PARAMS,
-
-    /** a defined? expression */
-    PM_CONTEXT_DEFINED,
-
-    /** a method definition's default parameter */
-    PM_CONTEXT_DEFAULT_PARAMS,
-
-    /** an else clause */
-    PM_CONTEXT_ELSE,
-
-    /** an elsif clause */
-    PM_CONTEXT_ELSIF,
-
-    /** an interpolated expression */
-    PM_CONTEXT_EMBEXPR,
-
-    /** a for loop */
-    PM_CONTEXT_FOR,
-
-    /** a for loop's index */
-    PM_CONTEXT_FOR_INDEX,
-
-    /** an if statement */
-    PM_CONTEXT_IF,
-
-    /** a lambda expression with braces */
-    PM_CONTEXT_LAMBDA_BRACES,
-
-    /** a lambda expression with do..end */
-    PM_CONTEXT_LAMBDA_DO_END,
-
-    /** an ensure statement within a lambda expression */
-    PM_CONTEXT_LAMBDA_ENSURE,
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1);
 
-    /** a rescue else statement within a lambda expression */
-    PM_CONTEXT_LAMBDA_ELSE,
-
-    /** a rescue statement within a lambda expression */
-    PM_CONTEXT_LAMBDA_RESCUE,
+/**
+ * Free both the memory held by the given parser and the parser itself.
+ *
+ * @param parser The parser to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** the predicate clause of a loop statement */
-    PM_CONTEXT_LOOP_PREDICATE,
+/**
+ * When the encoding that is being used to parse the source is changed by prism,
+ * we provide the ability here to call out to a user-defined function.
+ */
+typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
 
-    /** the top level context */
-    PM_CONTEXT_MAIN,
+/**
+ * This is the callback that is called when a token is lexed. It is passed
+ * the opaque data pointer, the parser, and the token that was lexed.
+ */
+typedef void (*pm_lex_callback_t)(pm_parser_t *parser, pm_token_t *token, void *data);
 
-    /** a module declaration */
-    PM_CONTEXT_MODULE,
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) PRISM_NONNULL(1);
 
-    /** an ensure statement within a module statement */
-    PM_CONTEXT_MODULE_ENSURE,
+/**
+ * Register a callback that will be called whenever a token is lexed.
+ *
+ * @param parser The parser to register the callback with.
+ * @param data The opaque data to pass to the callback when it is called.
+ * @param callback The callback to register.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) PRISM_NONNULL(1);
 
-    /** a rescue else statement within a module statement */
-    PM_CONTEXT_MODULE_ELSE,
+/**
+ * Returns the opaque data that is passed to the lex callback when it is called.
+ *
+ * @param parser The parser whose lex callback data we want to get.
+ * @returns The opaque data that is passed to the lex callback when it is called.
+ */
+PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a rescue statement within a module statement */
-    PM_CONTEXT_MODULE_RESCUE,
+/**
+ * Returns the raw pointer to the start of the source that is being parsed.
+ *
+ * @param parser the parser whose start pointer we want to get
+ * @returns the raw pointer to the start of the source that is being parsed
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a multiple target expression */
-    PM_CONTEXT_MULTI_TARGET,
+/**
+ * Returns the raw pointer to the end of the source that is being parsed.
+ *
+ * @param parser the parser whose end pointer we want to get
+ * @returns the raw pointer to the end of the source that is being parsed
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a parenthesized expression */
-    PM_CONTEXT_PARENS,
+/**
+ * Returns the line that the parser was considered to have started on.
+ *
+ * @param parser the parser whose start line we want to get
+ * @returns the line that the parser was considered to have started on
+ */
+PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** an END block */
-    PM_CONTEXT_POSTEXE,
+/**
+ * Returns the name of the encoding that is being used to parse the source.
+ *
+ * @param parser the parser whose encoding name we want to get
+ * @returns the name of the encoding that is being used to parse the source
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a predicate inside an if/elsif/unless statement */
-    PM_CONTEXT_PREDICATE,
+/**
+ * Returns the width of the character at the given pointer in the encoding that
+ * is being used to parse the source.
+ *
+ * @param parser the parser whose encoding we want to use
+ * @param start a pointer to the start of the character
+ * @param remaining the number of bytes remaining in the source
+ * @returns the width of the character in bytes
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) PRISM_NONNULL(1, 2);
 
-    /** a BEGIN block */
-    PM_CONTEXT_PREEXE,
+/**
+ * Returns whether or not the parser is using the US-ASCII encoding.
+ *
+ * @param parser the parser to check
+ * @returns true if the parser is using US-ASCII encoding, false otherwise
+ */
+PRISM_EXPORTED_FUNCTION bool pm_parser_encoding_us_ascii(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a modifier rescue clause */
-    PM_CONTEXT_RESCUE_MODIFIER,
+/**
+ * Returns the filepath that is being used to parse the source.
+ *
+ * @param parser the parser whose filepath we want to get
+ * @returns a pointer to the filepath string
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_parser_filepath(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a singleton class definition */
-    PM_CONTEXT_SCLASS,
+/**
+ * Find a constant in the parser's constant pool. Returns the id of the
+ * constant, or 0 if the constant is not found.
+ *
+ * @param parser the parser whose constant pool we want to search
+ * @param start a pointer to the start of the string to search for
+ * @param length the length of the string to search for
+ * @returns the id of the constant, or 0 if the constant is not found
+ */
+PRISM_EXPORTED_FUNCTION pm_constant_id_t pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) PRISM_NONNULL(1, 2);
 
-    /** an ensure statement with a singleton class */
-    PM_CONTEXT_SCLASS_ENSURE,
+/**
+ * Returns the frozen string literal value of the parser, as determined by the
+ * frozen_string_literal magic comment or the option set on the parser.
+ *
+ * @param parser the parser whose frozen string literal value we want to get
+ * @returns -1 if disabled, 0 if unset, 1 if enabled
+ */
+PRISM_EXPORTED_FUNCTION int8_t pm_parser_frozen_string_literal(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a rescue else statement with a singleton class */
-    PM_CONTEXT_SCLASS_ELSE,
+/**
+ * Returns the line offsets that are associated with the given parser.
+ *
+ * @param parser the parser whose line offsets we want to get
+ * @returns the line offsets that are associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a rescue statement with a singleton class */
-    PM_CONTEXT_SCLASS_RESCUE,
+/**
+ * Returns the location of the __DATA__ section that is associated with the
+ * given parser.
+ *
+ * @param parser the parser whose data location we want to get
+ * @returns the location of the __DATA__ section that is associated with the
+ *     given parser. If it is unset, then the length will be set to 0.
+ */
+PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a ternary expression */
-    PM_CONTEXT_TERNARY,
+/**
+ * Returns whether the given parser is continuable, meaning that it could become
+ * valid if more input were appended, as opposed to being definitively invalid.
+ *
+ * @param parser the parser whose continuable status we want to get
+ * @returns whether the given parser is continuable
+ */
+PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** an unless statement */
-    PM_CONTEXT_UNLESS,
+/**
+ * Returns the lex state of the parser. Note that this is an internal detail,
+ * and we are purposefully not returning an instance of the internal enum that
+ * we use to track this. This is only exposed because we need it for some very
+ * niche use cases. Most consumers should avoid this function.
+ *
+ * @param parser the parser whose lex state we want to get
+ * @returns the lex state of the parser
+ */
+PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** an until statement */
-    PM_CONTEXT_UNTIL,
+/**
+ * Returns the number of comments associated with the given parser.
+ *
+ * @param parser the parser whose comments we want to get the size of
+ * @returns the number of comments associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** a while statement */
-    PM_CONTEXT_WHILE,
-} pm_context_t;
+/**
+ * A callback function that can be used to process comments found while parsing.
+ */
+typedef void (*pm_comment_callback_t)(const pm_comment_t *comment, void *data);
 
-/** This is a node in a linked list of contexts. */
-typedef struct pm_context_node {
-    /** The context that this node represents. */
-    pm_context_t context;
+/**
+ * Iterates over the comments associated with the given parser and calls the
+ * given callback for each comment.
+ *
+ * @param parser the parser whose comments we want to iterate over
+ * @param callback the callback function to call for each comment. This function
+ *     will be passed a pointer to the comment and the data parameter passed to
+ *     this function.
+ * @param data the data to pass to the callback function for each comment. This
+ *     can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) PRISM_NONNULL(1);
 
-    /** A pointer to the previous context in the linked list. */
-    struct pm_context_node *prev;
-} pm_context_node_t;
+/**
+ * Returns the number of magic comments associated with the given parser.
+ *
+ * @param parser the parser whose magic comments we want to get the size of
+ * @returns the number of magic comments associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-/** This is the type of a comment that we've found while parsing. */
-typedef enum {
-    PM_COMMENT_INLINE,
-    PM_COMMENT_EMBDOC
-} pm_comment_type_t;
+/**
+ * A callback function that can be used to process magic comments found while parsing.
+ */
+typedef void (*pm_magic_comment_callback_t)(const pm_magic_comment_t *magic_comment, void *data);
 
 /**
- * This is a node in the linked list of comments that we've found while parsing.
+ * Iterates over the magic comments associated with the given parser and calls the
+ * given callback for each magic comment.
  *
- * @extends pm_list_node_t
+ * @param parser the parser whose magic comments we want to iterate over
+ * @param callback the callback function to call for each magic comment. This
+ *     function will be passed a pointer to the magic comment and the data
+ *     parameter passed to this function.
+ * @param data the data to pass to the callback function for each magic comment.
+ *     This can be NULL if no data needs to be passed to the callback function.
  */
-typedef struct pm_comment {
-    /** The embedded base node. */
-    pm_list_node_t node;
+PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) PRISM_NONNULL(1);
 
-    /** The location of the comment in the source. */
-    pm_location_t location;
+/**
+ * Returns the number of errors associated with the given parser.
+ *
+ * @param parser the parser whose errors we want to get the size of
+ * @returns the number of errors associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** The type of comment that we've found. */
-    pm_comment_type_t type;
-} pm_comment_t;
+/**
+ * Returns the number of warnings associated with the given parser.
+ *
+ * @param parser the parser whose warnings we want to get the size of
+ * @returns the number of warnings associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser) PRISM_NONNULL(1);
 
 /**
- * This is a node in the linked list of magic comments that we've found while
+ * A callback function that can be used to process diagnostics found while
  * parsing.
+ */
+typedef void (*pm_diagnostic_callback_t)(const pm_diagnostic_t *diagnostic, void *data);
+
+/**
+ * Iterates over the errors associated with the given parser and calls the
+ * given callback for each error.
  *
- * @extends pm_list_node_t
+ * @param parser the parser whose errors we want to iterate over
+ * @param callback the callback function to call for each error. This function
+ *     will be passed a pointer to the error and the data parameter passed to
+ *     this function.
+ * @param data the data to pass to the callback function for each error. This
+ *     can be NULL if no data needs to be passed to the callback function.
  */
-typedef struct {
-    /** The embedded base node. */
-    pm_list_node_t node;
+PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1);
 
-    /** A pointer to the start of the key in the source. */
-    const uint8_t *key_start;
+/**
+ * Iterates over the warnings associated with the given parser and calls the
+ * given callback for each warning.
+ *
+ * @param parser the parser whose warnings we want to iterate over
+ * @param callback the callback function to call for each warning. This function
+ *     will be passed a pointer to the warning and the data parameter passed to
+ *     this function.
+ * @param data the data to pass to the callback function for each warning. This
+ *     can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1);
 
-    /** A pointer to the start of the value in the source. */
-    const uint8_t *value_start;
+/**
+ * Returns the number of constants in the constant pool associated with the
+ * given parser.
+ *
+ * @param parser the parser whose constant pool constants we want to get the
+ *     size of
+ * @returns the number of constants in the constant pool associated with the
+ *     given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser) PRISM_NONNULL(1);
 
-    /** The length of the key in the source. */
-    uint32_t key_length;
+/**
+ * A callback function that can be used to process constants found while
+ * parsing.
+ */
+typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data);
 
-    /** The length of the value in the source. */
-    uint32_t value_length;
-} pm_magic_comment_t;
+/**
+ * Iterates over the constants in the constant pool associated with the given
+ * parser and calls the given callback for each constant.
+ *
+ * @param parser the parser whose constants we want to iterate over
+ * @param callback the callback function to call for each constant. This function
+ *     will be passed a pointer to the constant and the data parameter passed to
+ *     this function.
+ * @param data the data to pass to the callback function for each constant. This
+ *     can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) PRISM_NONNULL(1);
 
 /**
- * When the encoding that is being used to parse the source is changed by prism,
- * we provide the ability here to call out to a user-defined function.
+ * Returns a pointer to the constant at the given id in the constant pool
+ * associated with the given parser.
+ *
+ * @param parser the parser whose constant pool we want to look up from
+ * @param constant_id the id of the constant to look up (1-based)
+ * @returns a pointer to the constant at the given id
  */
-typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
+PRISM_EXPORTED_FUNCTION const pm_constant_t * pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) PRISM_NONNULL(1);
 
 /**
- * When you are lexing through a file, the lexer needs all of the information
- * that the parser additionally provides (for example, the local table). So if
- * you want to properly lex Ruby, you need to actually lex it in the context of
- * the parser. In order to provide this functionality, we optionally allow a
- * struct to be attached to the parser that calls back out to a user-provided
- * callback when each token is lexed.
- */
-typedef struct {
-    /**
-     * This opaque pointer is used to provide whatever information the user
-     * deemed necessary to the callback. In our case we use it to pass the array
-     * that the tokens get appended into.
-     */
-    void *data;
-
-    /**
-     * This is the callback that is called when a token is lexed. It is passed
-     * the opaque data pointer, the parser, and the token that was lexed.
-     */
-    void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
-} pm_lex_callback_t;
-
-/** The type of shareable constant value that can be set. */
-typedef uint8_t pm_shareable_constant_value_t;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
-
-/**
- * This tracks an individual local variable in a certain lexical context, as
- * well as the number of times is it read.
- */
-typedef struct {
-    /** The name of the local variable. */
-    pm_constant_id_t name;
-
-    /** The location of the local variable in the source. */
-    pm_location_t location;
-
-    /** The index of the local variable in the local table. */
-    uint32_t index;
-
-    /** The number of times the local variable is read. */
-    uint32_t reads;
-
-    /** The hash of the local variable. */
-    uint32_t hash;
-} pm_local_t;
-
-/**
- * This is a set of local variables in a certain lexical context (method, class,
- * module, etc.). We need to track how many times these variables are read in
- * order to warn if they only get written.
- */
-typedef struct pm_locals {
-    /** The number of local variables in the set. */
-    uint32_t size;
-
-    /** The capacity of the local variables set. */
-    uint32_t capacity;
-
-    /** The nullable allocated memory for the local variables in the set. */
-    pm_local_t *locals;
-} pm_locals_t;
-
-/** The flags about scope parameters that can be set. */
-typedef uint8_t pm_scope_parameters_t;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
-
-/**
- * This struct represents a node in a linked list of scopes. Some scopes can see
- * into their parent scopes, while others cannot.
- */
-typedef struct pm_scope {
-    /** A pointer to the previous scope in the linked list. */
-    struct pm_scope *previous;
-
-    /** The IDs of the locals in the given scope. */
-    pm_locals_t locals;
-
-    /**
-     * This is a list of the implicit parameters contained within the block.
-     * These will be processed after the block is parsed to determine the kind
-     * of parameters node that should be used and to check if any errors need to
-     * be added.
-     */
-    pm_node_list_t implicit_parameters;
-
-    /**
-     * This is a bitfield that indicates the parameters that are being used in
-     * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
-     * There are three different kinds of parameters that can be used in a
-     * scope:
-     *
-     * - Ordinary parameters (e.g., def foo(bar); end)
-     * - Numbered parameters (e.g., def foo; _1; end)
-     * - The it parameter (e.g., def foo; it; end)
-     *
-     * If ordinary parameters are being used, then certain parameters can be
-     * forwarded to another method/structure. Those are indicated by four
-     * additional bits in the params field. For example, some combinations of:
-     *
-     * - def foo(*); end
-     * - def foo(**); end
-     * - def foo(&); end
-     * - def foo(...); end
-     */
-    pm_scope_parameters_t parameters;
-
-    /**
-     * The current state of constant shareability for this scope. This is
-     * changed by magic shareable_constant_value comments.
-     */
-    pm_shareable_constant_value_t shareable_constant;
-
-    /**
-     * A boolean indicating whether or not this scope can see into its parent.
-     * If closed is true, then the scope cannot see into its parent.
-     */
-    bool closed;
-} pm_scope_t;
-
-/**
- * A struct that represents a stack of boolean values.
- */
-typedef uint32_t pm_state_stack_t;
-
-/**
- * This struct represents the overall parser. It contains a reference to the
- * source file, as well as pointers that indicate where in the source it's
- * currently parsing. It also contains the most recent and current token that
- * it's considering.
- */
-struct pm_parser {
-    /**
-     * The next node identifier that will be assigned. This is a unique
-     * identifier used to track nodes such that the syntax tree can be dropped
-     * but the node can be found through another parse.
-     */
-    uint32_t node_id;
-
-    /** The current state of the lexer. */
-    pm_lex_state_t lex_state;
-
-    /** Tracks the current nesting of (), [], and {}. */
-    int enclosure_nesting;
-
-    /**
-     * Used to temporarily track the nesting of enclosures to determine if a {
-     * is the beginning of a lambda following the parameters of a lambda.
-     */
-    int lambda_enclosure_nesting;
-
-    /**
-     * Used to track the nesting of braces to ensure we get the correct value
-     * when we are interpolating blocks with braces.
-     */
-    int brace_nesting;
-
-    /**
-     * The stack used to determine if a do keyword belongs to the predicate of a
-     * while, until, or for loop.
-     */
-    pm_state_stack_t do_loop_stack;
-
-    /**
-     * The stack used to determine if a do keyword belongs to the beginning of a
-     * block.
-     */
-    pm_state_stack_t accepts_block_stack;
-
-    /** A stack of lex modes. */
-    struct {
-        /** The current mode of the lexer. */
-        pm_lex_mode_t *current;
-
-        /** The stack of lexer modes. */
-        pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
-
-        /** The current index into the lexer mode stack. */
-        size_t index;
-    } lex_modes;
-
-    /** The pointer to the start of the source. */
-    const uint8_t *start;
-
-    /** The pointer to the end of the source. */
-    const uint8_t *end;
-
-    /** The previous token we were considering. */
-    pm_token_t previous;
-
-    /** The current token we're considering. */
-    pm_token_t current;
-
-    /**
-     * This is a special field set on the parser when we need the parser to jump
-     * to a specific location when lexing the next token, as opposed to just
-     * using the end of the previous token. Normally this is NULL.
-     */
-    const uint8_t *next_start;
-
-    /**
-     * This field indicates the end of a heredoc whose identifier was found on
-     * the current line. If another heredoc is found on the same line, then this
-     * will be moved forward to the end of that heredoc. If no heredocs are
-     * found on a line then this is NULL.
-     */
-    const uint8_t *heredoc_end;
-
-    /** The list of comments that have been found while parsing. */
-    pm_list_t comment_list;
-
-    /** The list of magic comments that have been found while parsing. */
-    pm_list_t magic_comment_list;
-
-    /**
-     * An optional location that represents the location of the __END__ marker
-     * and the rest of the content of the file. This content is loaded into the
-     * DATA constant when the file being parsed is the main file being executed.
-     */
-    pm_location_t data_loc;
-
-    /** The list of warnings that have been found while parsing. */
-    pm_list_t warning_list;
-
-    /** The list of errors that have been found while parsing. */
-    pm_list_t error_list;
-
-    /** The current local scope. */
-    pm_scope_t *current_scope;
-
-    /** The current parsing context. */
-    pm_context_node_t *current_context;
-
-    /**
-     * The hash keys for the hash that is currently being parsed. This is not
-     * usually necessary because it can pass it down the various call chains,
-     * but in the event that you're parsing a hash that is being directly
-     * pushed into another hash with **, we need to share the hash keys so that
-     * we can warn for the nested hash as well.
-     */
-    pm_static_literals_t *current_hash_keys;
-
-    /**
-     * The encoding functions for the current file is attached to the parser as
-     * it's parsing so that it can change with a magic comment.
-     */
-    const pm_encoding_t *encoding;
-
-    /**
-     * When the encoding that is being used to parse the source is changed by
-     * prism, we provide the ability here to call out to a user-defined
-     * function.
-     */
-    pm_encoding_changed_callback_t encoding_changed_callback;
-
-    /**
-     * This pointer indicates where a comment must start if it is to be
-     * considered an encoding comment.
-     */
-    const uint8_t *encoding_comment_start;
-
-    /**
-     * This is an optional callback that can be attached to the parser that will
-     * be called whenever a new token is lexed by the parser.
-     */
-    pm_lex_callback_t *lex_callback;
-
-    /**
-     * This is the path of the file being parsed. We use the filepath when
-     * constructing SourceFileNodes.
-     */
-    pm_string_t filepath;
-
-    /**
-     * This constant pool keeps all of the constants defined throughout the file
-     * so that we can reference them later.
-     */
-    pm_constant_pool_t constant_pool;
-
-    /** This is the list of newline offsets in the source file. */
-    pm_newline_list_t newline_list;
-
-    /**
-     * We want to add a flag to integer nodes that indicates their base. We only
-     * want to parse these once, but we don't have space on the token itself to
-     * communicate this information. So we store it here and pass it through
-     * when we find tokens that we need it for.
-     */
-    pm_node_flags_t integer_base;
-
-    /**
-     * This string is used to pass information from the lexer to the parser. It
-     * is particularly necessary because of escape sequences.
-     */
-    pm_string_t current_string;
-
-    /**
-     * The line number at the start of the parse. This will be used to offset
-     * the line numbers of all of the locations.
-     */
-    int32_t start_line;
-
-    /**
-     * When a string-like expression is being lexed, any byte or escape sequence
-     * that resolves to a value whose top bit is set (i.e., >= 0x80) will
-     * explicitly set the encoding to the same encoding as the source.
-     * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
-     * resolves to a value whose top bit is set, then the encoding will be
-     * explicitly set to UTF-8.
-     *
-     * The _next_ time this happens, if the encoding that is about to become the
-     * explicitly set encoding does not match the previously set explicit
-     * encoding, a mixed encoding error will be emitted.
-     *
-     * When the expression is finished being lexed, the explicit encoding
-     * controls the encoding of the expression. For the most part this means
-     * that the expression will either be encoded in the source encoding or
-     * UTF-8. This holds for all encodings except US-ASCII. If the source is
-     * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
-     * expression will be encoded as ASCII-8BIT.
-     *
-     * Note that if the expression is a list, different elements within the same
-     * list can have different encodings, so this will get reset between each
-     * element. Furthermore all of this only applies to lists that support
-     * interpolation, because otherwise escapes that could change the encoding
-     * are ignored.
-     *
-     * At first glance, it may make more sense for this to live on the lexer
-     * mode, but we need it here to communicate back to the parser for character
-     * literals that do not push a new lexer mode.
-     */
-    const pm_encoding_t *explicit_encoding;
-
-    /**
-     * When parsing block exits (e.g., break, next, redo), we need to validate
-     * that they are in correct contexts. For the most part we can do this by
-     * looking at our parent contexts. However, modifier while and until
-     * expressions can change that context to make block exits valid. In these
-     * cases, we need to keep track of the block exits and then validate them
-     * after the expression has been parsed.
-     *
-     * We use a pointer here because we don't want to keep a whole list attached
-     * since this will only be used in the context of begin/end expressions.
-     */
-    pm_node_list_t *current_block_exits;
-
-    /** The version of prism that we should use to parse. */
-    pm_options_version_t version;
-
-    /** The command line flags given from the options. */
-    uint8_t command_line;
-
-    /**
-     * Whether or not we have found a frozen_string_literal magic comment with
-     * a true or false value.
-     * May be:
-     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
-     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
-     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
-     */
-    int8_t frozen_string_literal;
-
-    /**
-     * Whether or not we are parsing an eval string. This impacts whether or not
-     * we should evaluate if block exits/yields are valid.
-     */
-    bool parsing_eval;
-
-    /**
-     * Whether or not we are parsing a "partial" script, which is a script that
-     * will be evaluated in the context of another script, so we should not
-     * check jumps (next/break/etc.) for validity.
-     */
-    bool partial_script;
-
-    /** Whether or not we're at the beginning of a command. */
-    bool command_start;
-
-    /** Whether or not we're currently recovering from a syntax error. */
-    bool recovering;
-
-    /**
-     * This is very specialized behavior for when you want to parse in a context
-     * that does not respect encoding comments. Its main use case is translating
-     * into the whitequark/parser AST which re-encodes source files in UTF-8
-     * before they are parsed and ignores encoding comments.
-     */
-    bool encoding_locked;
-
-    /**
-     * Whether or not the encoding has been changed by a magic comment. We use
-     * this to provide a fast path for the lexer instead of going through the
-     * function pointer.
-     */
-    bool encoding_changed;
-
-    /**
-     * This flag indicates that we are currently parsing a pattern matching
-     * expression and impacts that calculation of newlines.
-     */
-    bool pattern_matching_newlines;
-
-    /** This flag indicates that we are currently parsing a keyword argument. */
-    bool in_keyword_arg;
-
-    /**
-     * Whether or not the parser has seen a token that has semantic meaning
-     * (i.e., a token that is not a comment or whitespace).
-     */
-    bool semantic_token_seen;
-
-    /**
-     * True if the current regular expression being lexed contains only ASCII
-     * characters.
-     */
-    bool current_regular_expression_ascii_only;
-
-    /**
-     * By default, Ruby always warns about mismatched indentation. This can be
-     * toggled with a magic comment.
-     */
-    bool warn_mismatched_indentation;
-};
+ * Initiate the parser with the given parser.
+ *
+ * @param parser The parser to use.
+ * @returns The AST representing the source.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1);
 
 #endif
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
index 5a52b2b6b8..0d8e416341 100644
--- a/prism/prettyprint.h
+++ b/prism/prettyprint.h
@@ -6,19 +6,16 @@
 #ifndef PRISM_PRETTYPRINT_H
 #define PRISM_PRETTYPRINT_H
 
-#include "prism/defines.h"
+#include "prism/excludes.h"
 
-#ifdef PRISM_EXCLUDE_PRETTYPRINT
+#ifndef PRISM_EXCLUDE_PRETTYPRINT
 
-void pm_prettyprint(void);
-
-#else
-
-#include <stdio.h>
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
 
 #include "prism/ast.h"
+#include "prism/buffer.h"
 #include "prism/parser.h"
-#include "prism/util/pm_buffer.h"
 
 /**
  * Pretty-prints the AST represented by the given node to the given buffer.
@@ -27,7 +24,7 @@ void pm_prettyprint(void);
  * @param parser The parser that parsed the AST.
  * @param node The root node of the AST to pretty-print.
  */
-PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
+PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3);
 
 #endif
 
diff --git a/prism/prism.c b/prism/prism.c
index cc634b59e3..a8bbcea097 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,4 +1,90 @@
-#include "prism.h"
+#include "prism/compiler/accel.h"
+#include "prism/compiler/fallthrough.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/bit.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/char.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/isinf.h"
+#include "prism/internal/line_offset_list.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/memchr.h"
+#include "prism/internal/node.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
+#include "prism/internal/regexp.h"
+#include "prism/internal/serialize.h"
+#include "prism/internal/source.h"
+#include "prism/internal/static_literals.h"
+#include "prism/internal/stringy.h"
+#include "prism/internal/strncasecmp.h"
+#include "prism/internal/strpbrk.h"
+#include "prism/internal/tokens.h"
+
+#include "prism/excludes.h"
+#include "prism/serialize.h"
+#include "prism/stream.h"
+#include "prism/version.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * When we are parsing using recursive descent, we want to protect against
+ * malicious payloads that could attempt to crash our parser. We do this by
+ * specifying a maximum depth to which we are allowed to recurse.
+ */
+#ifndef PRISM_DEPTH_MAXIMUM
+    #define PRISM_DEPTH_MAXIMUM 10000
+#endif
+
+/**
+ * A simple utility macro to concatenate two tokens together, necessary when one
+ * of the tokens is itself a macro.
+ */
+#define PM_CONCATENATE(left, right) left ## right
+
+/**
+ * We want to be able to use static assertions, but they weren't standardized
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
+ * fail to compile due to a negative array size if the condition is false.
+ */
+#if defined(_Static_assert)
+#   define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
+#else
+#   define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
+#endif
+
+/**
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
+ * branch predication.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+    /** The compiler should predicate that this branch will be taken. */
+    #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
+
+    /** The compiler should predicate that this branch will not be taken. */
+    #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+    /** Void because this platform does not support branch prediction hints. */
+    #define PRISM_LIKELY(x)   (x)
+
+    /** Void because this platform does not support branch prediction hints. */
+    #define PRISM_UNLIKELY(x) (x)
+#endif
 
 /**
  * The prism version and the serialization format.
@@ -19,6 +105,51 @@ pm_version(void) {
 #define MAX(a,b) (((a)>(b))?(a):(b))
 
 /******************************************************************************/
+/* Helpful AST-related macros                                                 */
+/******************************************************************************/
+
+#define U32(value_) ((uint32_t) (value_))
+
+#define FL PM_NODE_FLAGS
+#define UP PM_NODE_UPCAST
+
+#define PM_LOCATION_START(location_) ((location_)->start)
+#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
+
+#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
+#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
+#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
+#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
+
+#define PM_NODE_START(node_) (UP(node_)->location.start)
+#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
+#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
+#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
+
+#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
+#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+
+#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
+#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
+#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
+#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
+
+#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
+#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
+#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
+#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
+
+#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
+#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
+
+#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
+#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
+#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
+
+/******************************************************************************/
 /* Lex mode manipulations                                                     */
 /******************************************************************************/
 
@@ -26,7 +157,7 @@ pm_version(void) {
  * Returns the incrementor character that should be used to increment the
  * nesting count if one is possible.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 lex_mode_incrementor(const uint8_t start) {
     switch (start) {
         case '(':
@@ -43,7 +174,7 @@ lex_mode_incrementor(const uint8_t start) {
  * Returns the matching character that should be used to terminate a list
  * beginning with the given character.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 lex_mode_terminator(const uint8_t start) {
     switch (start) {
         case '(':
@@ -85,7 +216,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
 /**
  * Push on a new list lex mode.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
     uint8_t incrementor = lex_mode_incrementor(delimiter);
     uint8_t terminator = lex_mode_terminator(delimiter);
@@ -103,7 +234,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
     // These are the places where we need to split up the content of the list.
     // We'll use strpbrk to find the first of these characters.
     uint8_t *breakpoints = lex_mode.as.list.breakpoints;
-    memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
     size_t index = 7;
 
     // Now we'll add the terminator to the list of breakpoints. If the
@@ -132,7 +264,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
  * called when we're at the end of the file. We want the parser to be able to
  * perform its normal error tolerance.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_mode_push_list_eof(pm_parser_t *parser) {
     return lex_mode_push_list(parser, false, '\0');
 }
@@ -140,7 +272,7 @@ lex_mode_push_list_eof(pm_parser_t *parser) {
 /**
  * Push on a new regexp lex mode.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
     pm_lex_mode_t lex_mode = {
         .mode = PM_LEX_REGEXP,
@@ -155,7 +287,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
     // regular expression. We'll use strpbrk to find the first of these
     // characters.
     uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
-    memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
     size_t index = 4;
 
     // First we'll add the terminator.
@@ -175,7 +308,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
 /**
  * Push on a new string lex mode.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
     pm_lex_mode_t lex_mode = {
         .mode = PM_LEX_STRING,
@@ -191,7 +324,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
     // These are the places where we need to split up the content of the
     // string. We'll use strpbrk to find the first of these characters.
     uint8_t *breakpoints = lex_mode.as.string.breakpoints;
-    memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
     size_t index = 3;
 
     // Now add in the terminator. If the terminator is not already a NULL byte,
@@ -221,7 +355,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
  * called when we're at the end of the file. We want the parser to be able to
  * perform its normal error tolerance.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_mode_push_string_eof(pm_parser_t *parser) {
     return lex_mode_push_string(parser, false, false, '\0', '\0');
 }
@@ -241,7 +375,7 @@ lex_mode_pop(pm_parser_t *parser) {
     } else {
         parser->lex_modes.index--;
         pm_lex_mode_t *prev = parser->lex_modes.current->prev;
-        xfree(parser->lex_modes.current);
+        xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
         parser->lex_modes.current = prev;
     }
 }
@@ -249,7 +383,7 @@ lex_mode_pop(pm_parser_t *parser) {
 /**
  * This is the equivalent of IS_lex_state is CRuby.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
     return parser->lex_state & state;
 }
@@ -260,7 +394,7 @@ typedef enum {
     PM_IGNORED_NEWLINE_PATTERN
 } pm_ignored_newline_type_t;
 
-static inline pm_ignored_newline_type_t
+static PRISM_INLINE pm_ignored_newline_type_t
 lex_state_ignored_p(pm_parser_t *parser) {
     bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
 
@@ -273,17 +407,17 @@ lex_state_ignored_p(pm_parser_t *parser) {
     }
 }
 
-static inline bool
+static PRISM_INLINE bool
 lex_state_beg_p(pm_parser_t *parser) {
     return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
 }
 
-static inline bool
+static PRISM_INLINE bool
 lex_state_arg_p(pm_parser_t *parser) {
     return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
 }
 
-static inline bool
+static PRISM_INLINE bool
 lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
     if (parser->current.end >= parser->end) {
         return false;
@@ -291,7 +425,7 @@ lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
     return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
 }
 
-static inline bool
+static PRISM_INLINE bool
 lex_state_end_p(pm_parser_t *parser) {
     return lex_state_p(parser, PM_LEX_STATE_END_ANY);
 }
@@ -299,7 +433,7 @@ lex_state_end_p(pm_parser_t *parser) {
 /**
  * This is the equivalent of IS_AFTER_OPERATOR in CRuby.
  */
-static inline bool
+static PRISM_INLINE bool
 lex_state_operator_p(pm_parser_t *parser) {
     return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
 }
@@ -308,7 +442,7 @@ lex_state_operator_p(pm_parser_t *parser) {
  * Set the state of the lexer. This is defined as a function to be able to put a
  * breakpoint in it.
  */
-static inline void
+static PRISM_INLINE void
 lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
     parser->lex_state = state;
 }
@@ -322,7 +456,7 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
 #endif
 
 #if PM_DEBUG_LOGGING
-PRISM_ATTRIBUTE_UNUSED static void
+PRISM_UNUSED static void
 debug_state(pm_parser_t *parser) {
     fprintf(stderr, "STATE: ");
     bool first = true;
@@ -403,140 +537,134 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
 /**
  * Append an error to the list of errors on the parser.
  */
-static inline void
-pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
 }
 
 /**
- * Append an error to the list of errors on the parser using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * given token.
  */
-#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
-    pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
+}
 
 /**
  * Append an error to the list of errors on the parser using the location of the
  * current token.
  */
-static inline void
+static PRISM_INLINE void
 pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
-    pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
+    pm_parser_err_token(parser, &parser->current, diag_id);
 }
 
 /**
- * Append an error to the list of errors on the parser using the given location
- * using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * previous token.
  */
-#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
-    PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    pm_parser_err_token(parser, &parser->previous, diag_id);
+}
 
 /**
  * Append an error to the list of errors on the parser using the location of the
  * given node.
  */
-static inline void
+static PRISM_INLINE void
 pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
-    pm_parser_err(parser, node->location.start, node->location.end, diag_id);
+    pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
 }
 
 /**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string.
- */
-#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
-    PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
-
-/**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string, and add on the content of the node.
+ * Append an error to the list of errors on the parser using a format string.
  */
-#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
-    PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
+#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
+    pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
 
 /**
  * Append an error to the list of errors on the parser using the location of the
- * previous token.
+ * given node and a format string.
  */
-static inline void
-pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
-    pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+    PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
 
 /**
  * Append an error to the list of errors on the parser using the location of the
- * given token.
+ * given node and a format string, and add on the content of the node.
  */
-static inline void
-pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
-    pm_parser_err(parser, token->start, token->end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
+    PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
 
 /**
  * Append an error to the list of errors on the parser using the location of the
  * given token and a format string.
  */
-#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
-    PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
+    PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
 
 /**
  * Append an error to the list of errors on the parser using the location of the
  * given token and a format string, and add on the content of the token.
  */
-#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
-    PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+    PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
 
 /**
  * Append a warning to the list of warnings on the parser.
  */
-static inline void
-pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
 }
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
  * the given token.
  */
-static inline void
+static PRISM_INLINE void
 pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
-    pm_parser_warn(parser, token->start, token->end, diag_id);
+    pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
 }
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
  * the given node.
  */
-static inline void
+static PRISM_INLINE void
 pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
-    pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
+    pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
 }
 
 /**
- * Append a warning to the list of warnings on the parser using a format string.
+ * Append a warning to the list of warnings on the parser using a format string
+ * and the given location.
  */
-#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
-    pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
+    pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
  * the given token and a format string.
  */
-#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
-    PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
+    PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
  * the given token and a format string, and add on the content of the token.
  */
-#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
-    PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+    PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
  * the given node and a format string.
  */
-#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
-    PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+    PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
 
 /**
  * Add an error for an expected heredoc terminator. This is a special function
@@ -547,8 +675,8 @@ static void
 pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
     PM_PARSER_ERR_FORMAT(
         parser,
-        ident_start,
-        ident_start + ident_length,
+        U32(ident_start - parser->start),
+        U32(ident_length),
         PM_ERR_HEREDOC_TERM,
         (int) ident_length,
         (const char *) ident_start
@@ -708,7 +836,7 @@ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t
 /**
  * Get the current state of constant shareability.
  */
-static inline pm_shareable_constant_value_t
+static PRISM_INLINE pm_shareable_constant_value_t
 pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
     return parser->current_scope->shareable_constant;
 }
@@ -733,12 +861,12 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan
 /**
  * The point at which the set of locals switches from being a list to a hash.
  */
-#define PM_LOCALS_HASH_THRESHOLD 9
+#define PM_LOCALS_HASH_THRESHOLD 5
 
 static void
 pm_locals_free(pm_locals_t *locals) {
     if (locals->capacity > 0) {
-        xfree(locals->locals);
+        xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
     }
 }
 
@@ -810,11 +938,13 @@ pm_locals_resize(pm_locals_t *locals) {
  * @return True if the local was added, and false if the local already exists.
  */
 static bool
-pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
     if (locals->size >= (locals->capacity / 4 * 3)) {
         pm_locals_resize(locals);
     }
 
+    locals->bloom |= (1u << (name & 31));
+
     if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
         for (uint32_t index = 0; index < locals->capacity; index++) {
             pm_local_t *local = &locals->locals[index];
@@ -822,7 +952,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
             if (local->name == PM_CONSTANT_ID_UNSET) {
                 *local = (pm_local_t) {
                     .name = name,
-                    .location = { .start = start, .end = end },
+                    .location = { .start = start, .length = length },
                     .index = locals->size++,
                     .reads = reads,
                     .hash = 0
@@ -843,7 +973,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
             if (local->name == PM_CONSTANT_ID_UNSET) {
                 *local = (pm_local_t) {
                     .name = name,
-                    .location = { .start = start, .end = end },
+                    .location = { .start = start, .length = length },
                     .index = locals->size++,
                     .reads = reads,
                     .hash = initial_hash
@@ -867,6 +997,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
  */
 static uint32_t
 pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+    if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
+
     if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
         for (uint32_t index = 0; index < locals->size; index++) {
             pm_local_t *local = &locals->locals[index];
@@ -943,8 +1075,8 @@ pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
  * written but not read in certain contexts.
  */
 static void
-pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
-    pm_constant_id_list_init_capacity(list, locals->size);
+pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
+    pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
 
     // If we're still below the threshold for switching to a hash, then we only
     // need to loop over the locals until we hit the size because the locals are
@@ -961,14 +1093,14 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
         if (local->name != PM_CONSTANT_ID_UNSET) {
             pm_constant_id_list_insert(list, (size_t) local->index, local->name);
 
-            if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
+            if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
                 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
 
                 if (constant->length >= 1 && *constant->start != '_') {
                     PM_PARSER_WARN_FORMAT(
                         parser,
                         local->location.start,
-                        local->location.end,
+                        local->location.length,
                         PM_WARN_UNUSED_LOCAL_VARIABLE,
                         (int) constant->length,
                         (const char *) constant->start
@@ -986,43 +1118,53 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
 /**
  * Retrieve the constant pool id for the given location.
  */
-static inline pm_constant_id_t
-pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
-    return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+static PRISM_INLINE pm_constant_id_t
+pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    /* Fast path: if this is the same token as the last lookup (same pointer
+     * range), return the cached result. */
+    if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
+        return parser->constant_cache.id;
+    }
+
+    pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
+
+    parser->constant_cache.start = start;
+    parser->constant_cache.end = end;
+    parser->constant_cache.id = id;
+
+    return id;
 }
 
 /**
  * Retrieve the constant pool id for the given string.
  */
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
 pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
-    return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
+    return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
 }
 
 /**
  * Retrieve the constant pool id for the given static literal C string.
  */
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
 pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
-    return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
+    return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
 }
 
 /**
  * Retrieve the constant pool id for the given token.
  */
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
 pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
-    return pm_parser_constant_id_location(parser, token->start, token->end);
+    return pm_parser_constant_id_raw(parser, token->start, token->end);
 }
 
 /**
- * Retrieve the constant pool id for the given token. If the token is not
- * provided, then return 0.
+ * This macro allows you to define a case statement for all of the nodes that
+ * may result in a void value.
  */
-static inline pm_constant_id_t
-pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
-    return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
-}
+#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
+    case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
 
 /**
  * Check whether or not the given node is value expression.
@@ -1035,12 +1177,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
 
     while (node != NULL) {
         switch (PM_NODE_TYPE(node)) {
-            case PM_RETURN_NODE:
-            case PM_BREAK_NODE:
-            case PM_NEXT_NODE:
-            case PM_REDO_NODE:
-            case PM_RETRY_NODE:
-            case PM_MATCH_REQUIRED_NODE:
+            case PM_CASE_VOID_VALUE:
                 return void_node != NULL ? void_node : node;
             case PM_MATCH_PREDICATE_NODE:
                 return NULL;
@@ -1049,57 +1186,128 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
 
                 if (cast->ensure_clause != NULL) {
                     if (cast->rescue_clause != NULL) {
-                        pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
                         if (vn != NULL) return vn;
                     }
 
                     if (cast->statements != NULL) {
-                        pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
                         if (vn != NULL) return vn;
                     }
 
-                    node = (pm_node_t *) cast->ensure_clause;
+                    node = UP(cast->ensure_clause);
                 } else if (cast->rescue_clause != NULL) {
-                    if (cast->statements == NULL) return NULL;
+                    // https://bugs.ruby-lang.org/issues/21669
+                    if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+                        if (cast->statements == NULL) return NULL;
 
-                    pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
-                    if (vn == NULL) return NULL;
-                    if (void_node == NULL) void_node = vn;
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+                        if (vn == NULL) return NULL;
+                        if (void_node == NULL) void_node = vn;
+                    }
 
                     for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
-                        pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
+
                         if (vn == NULL) {
+                            // https://bugs.ruby-lang.org/issues/21669
+                            if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+                                return NULL;
+                            }
                             void_node = NULL;
                             break;
                         }
-                        if (void_node == NULL) {
-                            void_node = vn;
-                        }
                     }
 
                     if (cast->else_clause != NULL) {
-                        node = (pm_node_t *) cast->else_clause;
+                        node = UP(cast->else_clause);
+
+                        // https://bugs.ruby-lang.org/issues/21669
+                        if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+                            pm_node_t *vn = pm_check_value_expression(parser, node);
+                            if (vn != NULL) return vn;
+                        }
                     } else {
                         return void_node;
                     }
                 } else {
-                    node = (pm_node_t *) cast->statements;
+                    node = UP(cast->statements);
                 }
 
                 break;
             }
+            case PM_CASE_NODE: {
+                // https://bugs.ruby-lang.org/issues/21669
+                if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+                    return NULL;
+                }
+
+                pm_case_node_t *cast = (pm_case_node_t *) node;
+                if (cast->else_clause == NULL) return NULL;
+
+                pm_node_t *condition;
+                PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+                    assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
+
+                    pm_when_node_t *cast = (pm_when_node_t *) condition;
+                    pm_node_t  *vn = pm_check_value_expression(parser, UP(cast->statements));
+                    if (vn == NULL) return NULL;
+                    if (void_node == NULL) void_node = vn;
+                }
+
+                node = UP(cast->else_clause);
+                break;
+            }
+            case PM_CASE_MATCH_NODE: {
+                // https://bugs.ruby-lang.org/issues/21669
+                if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+                    return NULL;
+                }
+
+                pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
+                if (cast->else_clause == NULL) return NULL;
+
+                pm_node_t *condition;
+                PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+                    assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
+
+                    pm_in_node_t *cast = (pm_in_node_t *) condition;
+                    pm_node_t  *vn = pm_check_value_expression(parser, UP(cast->statements));
+                    if (vn == NULL) return NULL;
+                    if (void_node == NULL) void_node = vn;
+                }
+
+                node = UP(cast->else_clause);
+                break;
+            }
             case PM_ENSURE_NODE: {
                 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
-                node = (pm_node_t *) cast->statements;
+                node = UP(cast->statements);
                 break;
             }
             case PM_PARENTHESES_NODE: {
                 pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
-                node = (pm_node_t *) cast->body;
+                node = UP(cast->body);
                 break;
             }
             case PM_STATEMENTS_NODE: {
                 pm_statements_node_t *cast = (pm_statements_node_t *) node;
+
+                // https://bugs.ruby-lang.org/issues/21669
+                if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+                    pm_node_t *body_part;
+                    PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
+                        switch (PM_NODE_TYPE(body_part)) {
+                            case PM_CASE_VOID_VALUE:
+                                if (void_node == NULL) {
+                                    void_node = body_part;
+                                }
+                                return void_node;
+                            default: break;
+                        }
+                    }
+                }
+
                 node = cast->body.nodes[cast->body.size - 1];
                 break;
             }
@@ -1108,7 +1316,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
                 if (cast->statements == NULL || cast->subsequent == NULL) {
                     return NULL;
                 }
-                pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+                pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
                 if (vn == NULL) {
                     return NULL;
                 }
@@ -1123,19 +1331,19 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
                 if (cast->statements == NULL || cast->else_clause == NULL) {
                     return NULL;
                 }
-                pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+                pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
                 if (vn == NULL) {
                     return NULL;
                 }
                 if (void_node == NULL) {
                     void_node = vn;
                 }
-                node = (pm_node_t *) cast->else_clause;
+                node = UP(cast->else_clause);
                 break;
             }
             case PM_ELSE_NODE: {
                 pm_else_node_t *cast = (pm_else_node_t *) node;
-                node = (pm_node_t *) cast->statements;
+                node = UP(cast->statements);
                 break;
             }
             case PM_AND_NODE: {
@@ -1165,7 +1373,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
     return NULL;
 }
 
-static inline void
+static PRISM_INLINE void
 pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
     pm_node_t *void_node = pm_check_value_expression(parser, node);
     if (void_node != NULL) {
@@ -1193,7 +1401,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
             break;
         case PM_CALL_NODE: {
             const pm_call_node_t *cast = (const pm_call_node_t *) node;
-            if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
+            if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
 
             const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
             switch (message->length) {
@@ -1406,10 +1614,10 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
  * Add a warning to the parser if the value that is being written inside of a
  * predicate to a conditional is a literal.
  */
-static inline void
+static PRISM_INLINE void
 pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
     if (pm_conditional_predicate_warn_write_literal_p(node)) {
-        pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
+        pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
     }
 }
 
@@ -1547,26 +1755,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr
 }
 
 /**
- * In a lot of places in the tree you can have tokens that are not provided but
- * that do not cause an error. For example, this happens in a method call
- * without parentheses. In these cases we set the token to the "not provided" type.
- * For example:
- *
- *     pm_token_t token = not_provided(parser);
- */
-static inline pm_token_t
-not_provided(pm_parser_t *parser) {
-    return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
-}
-
-#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
-#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
-#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
-#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
-#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
-#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
-
-/**
  * This is a special out parameter to the parse_arguments_list function that
  * includes opening and closing parentheses in addition to the arguments since
  * it's so common. It is handy to use when passing argument information to one
@@ -1592,22 +1780,29 @@ typedef struct {
 /**
  * Retrieve the end location of a `pm_arguments_t` object.
  */
-static inline const uint8_t *
+static PRISM_INLINE const pm_location_t *
 pm_arguments_end(pm_arguments_t *arguments) {
     if (arguments->block != NULL) {
-        const uint8_t *end = arguments->block->location.end;
-        if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
-            end = arguments->closing_loc.end;
+        uint32_t end = PM_NODE_END(arguments->block);
+
+        if (arguments->closing_loc.length > 0) {
+            uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
+            if (arguments_end > end) {
+                return &arguments->closing_loc;
+            }
         }
-        return end;
+        return &arguments->block->location;
     }
-    if (arguments->closing_loc.start != NULL) {
-        return arguments->closing_loc.end;
+    if (arguments->closing_loc.length > 0) {
+        return &arguments->closing_loc;
     }
     if (arguments->arguments != NULL) {
-        return arguments->arguments->base.location.end;
+        return &arguments->arguments->base.location;
+    }
+    if (arguments->opening_loc.length > 0) {
+        return &arguments->opening_loc;
     }
-    return arguments->closing_loc.end;
+    return NULL;
 }
 
 /**
@@ -1618,7 +1813,7 @@ static void
 pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
     // First, check that we have arguments and that we don't have a closing
     // location for them.
-    if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
+    if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
         return;
     }
 
@@ -1635,7 +1830,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
 
     // If we didn't hit a case before this check, then at this point we need to
     // add a syntax error.
-    pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+    pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
 }
 
 /******************************************************************************/
@@ -1648,7 +1843,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
  * reason we have the encoding_changed boolean to check if we need to go through
  * the function pointer or can just directly use the UTF-8 functions.
  */
-static inline size_t
+static PRISM_INLINE size_t
 char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
     if (n <= 0) return 0;
 
@@ -1675,7 +1870,7 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t
  * Similar to char_is_identifier but this function assumes that the encoding
  * has not been changed.
  */
-static inline size_t
+static PRISM_INLINE size_t
 char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
     if (n <= 0) {
         return 0;
@@ -1687,11 +1882,189 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
 }
 
 /**
+ * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using
+ * wide operations. Returns the number of leading ASCII identifier bytes.
+ * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8)
+ * with a byte-at-a-time loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ *   1. NEON — processes 16 bytes per iteration on aarch64.
+ *   2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ *   3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    const uint8_t *cursor = start;
+
+    // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
+    // Each high nibble is assigned a unique bit; the low nibble table
+    // contains the OR of bits for all high nibbles that have an
+    // identifier character at that low nibble position. A byte is an
+    // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
+    static const uint8_t low_lut_data[16] = {
+        0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
+        0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
+    };
+    static const uint8_t high_lut_data[16] = {
+        0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    };
+    const uint8x16_t low_lut = vld1q_u8(low_lut_data);
+    const uint8x16_t high_lut = vld1q_u8(high_lut_data);
+    const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+
+    while (cursor + 16 <= end) {
+        uint8x16_t v = vld1q_u8(cursor);
+
+        uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+        uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+        uint8x16_t ident = vandq_u8(lo_class, hi_class);
+
+        // Fast check: if the per-byte minimum is nonzero, every byte matched.
+        if (vminvq_u8(ident) != 0) {
+            cursor += 16;
+            continue;
+        }
+
+        // Find the first non-identifier byte (zero in ident).
+        uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
+        uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
+
+        if (lo != 0) {
+            cursor += pm_ctzll(lo) / 8;
+        } else {
+            uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
+            cursor += 8 + pm_ctzll(hi) / 8;
+        }
+
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    const uint8_t *cursor = start;
+
+    while (cursor + 16 <= end) {
+        __m128i v = _mm_loadu_si128((const __m128i *) cursor);
+        __m128i zero = _mm_setzero_si128();
+
+        // Unsigned range check via saturating subtraction:
+        //   byte >= lo  ⟺  saturate(lo - byte) == 0
+        //   byte <= hi  ⟺  saturate(byte - hi) == 0
+
+        // Fold case: OR with 0x20 maps A-Z to a-z.
+        __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
+        __m128i letter = _mm_and_si128(
+            _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
+            _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
+
+        __m128i digit = _mm_and_si128(
+            _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
+            _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
+
+        __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
+
+        __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
+        int mask = _mm_movemask_epi8(ident);
+
+        if (mask == 0xFFFF) {
+            cursor += 16;
+            continue;
+        }
+
+        cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+// The SWAR path uses pm_ctzll to find the first non-matching byte within a
+// word, which only yields the correct byte index on little-endian targets.
+// We gate on a positive little-endian check so that unknown-endianness
+// platforms safely fall through to the no-op fallback.
+#elif defined(PRISM_HAS_SWAR)
+
+/**
+ * Portable SWAR fallback — processes 8 bytes per iteration.
+ *
+ * The byte-wise range checks avoid cross-byte borrows by pre-setting the high
+ * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value
+ * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is
+ * impossible. The result has bit 7 set if and only if byte >= lo. The same
+ * reasoning applies to the upper-bound direction.
+ */
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    static const uint64_t ones = 0x0101010101010101ULL;
+    static const uint64_t highs = 0x8080808080808080ULL;
+    const uint8_t *cursor = start;
+
+    while (cursor + 8 <= end) {
+        uint64_t word;
+        memcpy(&word, cursor, 8);
+
+        // Bail on any non-ASCII byte.
+        if (word & highs) break;
+
+        uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
+
+        // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
+        // then check the lowercase range once. A-Z maps to a-z; the
+        // only non-letter byte that could alias into [0x61,0x7A] is one
+        // whose original value was in [0x41,0x5A] — which is exactly
+        // the uppercase letters we want to match.
+        uint64_t lowered = word | (ones * 0x20);
+        uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
+
+        // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
+        // bytes equal to underscore. Safe from cross-byte borrows because
+        // the ASCII guard above ensures all bytes are < 0x80.
+        uint64_t xor_us = word ^ (ones * 0x5F);
+        uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
+
+        uint64_t ident = digit | letter | underscore;
+
+        if (ident == highs) {
+            cursor += 8;
+            continue;
+        }
+
+        // Find the first non-identifier byte. On little-endian the first
+        // byte sits in the least-significant position.
+        uint64_t not_ident = ~ident & highs;
+        cursor += pm_ctzll(not_ident) / 8;
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+#else
+
+// No-op fallback for big-endian or other unsupported platforms.
+// The caller's byte-at-a-time loop handles everything.
+#define scan_identifier_ascii(start, end) ((size_t) 0)
+
+#endif
+
+/**
  * Like the above, this function is also used extremely frequently to lex all of
  * the identifiers in a source file once the first character has been found. So
  * it's important that it be as fast as possible.
  */
-static inline size_t
+static PRISM_INLINE size_t
 char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
     if (n <= 0) {
         return 0;
@@ -1729,7 +2102,7 @@ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
 #undef BIT
 #undef PUNCT
 
-static inline bool
+static PRISM_INLINE bool
 char_is_global_name_punctuation(const uint8_t b) {
     const unsigned int i = (const unsigned int) b;
     if (i <= 0x20 || 0x7e < i) return false;
@@ -1737,7 +2110,7 @@ char_is_global_name_punctuation(const uint8_t b) {
     return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
 }
 
-static inline bool
+static PRISM_INLINE bool
 token_is_setter_name(pm_token_t *token) {
     return (
         (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
@@ -1825,7 +2198,7 @@ pm_local_is_keyword(const char *source, size_t length) {
 /**
  * Set the given flag on the given node.
  */
-static inline void
+static PRISM_INLINE void
 pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
     node->flags |= flag;
 }
@@ -1833,7 +2206,7 @@ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
 /**
  * Remove the given flag from the given node.
  */
-static inline void
+static PRISM_INLINE void
 pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
     node->flags &= (pm_node_flags_t) ~flag;
 }
@@ -1841,7 +2214,7 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
 /**
  * Set the repeated parameter flag on the given node.
  */
-static inline void
+static PRISM_INLINE void
 pm_node_flag_set_repeated_parameter(pm_node_t *node) {
     assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
             PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
@@ -1869,7 +2242,7 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
 /**
  * Parse out the options for a regular expression.
  */
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
 pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
     pm_node_flags_t flags = 0;
 
@@ -1895,9 +2268,9 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin
         size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
         if (unknown_flags_length != 0) {
             const char *word = unknown_flags_length >= 2 ? "options" : "option";
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
         }
-        pm_buffer_free(&unknown_flags);
+        pm_buffer_cleanup(&unknown_flags);
     }
 
     return flags;
@@ -1915,36 +2288,45 @@ static size_t
 pm_statements_node_body_length(pm_statements_node_t *node);
 
 /**
- * This function is here to allow us a place to extend in the future when we
- * implement our own arena allocation.
+ * Move an integer's values array into the arena. If the integer has heap-
+ * allocated values, copy them to the arena and free the original.
  */
-static inline void *
-pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
-    void *memory = xcalloc(1, size);
-    if (memory == NULL) {
-        fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
-        abort();
+static PRISM_INLINE void
+pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
+    if (integer->values != NULL) {
+        size_t byte_size = integer->length * sizeof(uint32_t);
+        uint32_t *old_values = integer->values;
+        integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
+        xfree(old_values);
     }
-    return memory;
 }
 
-#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
-#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
-
 /**
- * Allocate a new MissingNode node.
+ * Allocate a new ErrorRecoveryNode node with no unexpected child.
  */
-static pm_missing_node_t *
-pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
-    pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
-
-    *node = (pm_missing_node_t) {{
-        .type = PM_MISSING_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = { .start = start, .end = end }
-    }};
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
+    return pm_error_recovery_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = length }),
+        NULL
+    );
+}
 
-    return node;
+/**
+ * Allocate a new ErrorRecoveryNode node wrapping an unexpected child node.
+ */
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
+    return pm_error_recovery_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        unexpected->location,
+        unexpected
+    );
 }
 
 /**
@@ -1953,23 +2335,16 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t
 static pm_alias_global_variable_node_t *
 pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
     assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
-    pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
-
-    *node = (pm_alias_global_variable_node_t) {
-        {
-            .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = old_name->location.end
-            },
-        },
-        .new_name = new_name,
-        .old_name = old_name,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
-    };
 
-    return node;
+    return pm_alias_global_variable_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+        new_name,
+        old_name,
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
@@ -1978,23 +2353,16 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw
 static pm_alias_method_node_t *
 pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
     assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
-    pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
 
-    *node = (pm_alias_method_node_t) {
-        {
-            .type = PM_ALIAS_METHOD_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = old_name->location.end
-            },
-        },
-        .new_name = new_name,
-        .old_name = old_name,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
-    };
-
-    return node;
+    return pm_alias_method_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+        new_name,
+        old_name,
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
@@ -2002,23 +2370,15 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n
  */
 static pm_alternation_pattern_node_t *
 pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
-    pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
-
-    *node = (pm_alternation_pattern_node_t) {
-        {
-            .type = PM_ALTERNATION_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = left->location.start,
-                .end = right->location.end
-            },
-        },
-        .left = left,
-        .right = right,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_alternation_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(left, right),
+        left,
+        right,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2028,23 +2388,15 @@ static pm_and_node_t *
 pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
     pm_assert_value_expression(parser, left);
 
-    pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
-
-    *node = (pm_and_node_t) {
-        {
-            .type = PM_AND_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = left->location.start,
-                .end = right->location.end
-            },
-        },
-        .left = left,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .right = right
-    };
-
-    return node;
+    return pm_and_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(left, right),
+        left,
+        right,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2052,18 +2404,13 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera
  */
 static pm_arguments_node_t *
 pm_arguments_node_create(pm_parser_t *parser) {
-    pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
-
-    *node = (pm_arguments_node_t) {
-        {
-            .type = PM_ARGUMENTS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .arguments = { 0 }
-    };
-
-    return node;
+    return pm_arguments_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_node_list_t) { 0 })
+    );
 }
 
 /**
@@ -2078,19 +2425,22 @@ pm_arguments_node_size(pm_arguments_node_t *node) {
  * Append an argument to an arguments node.
  */
 static void
-pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
+pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
     if (pm_arguments_node_size(node) == 0) {
-        node->base.location.start = argument->location.start;
+        PM_NODE_START_SET_NODE(node, argument);
     }
 
-    node->base.location.end = argument->location.end;
-    pm_node_list_append(&node->arguments, argument);
+    if (PM_NODE_END(node) < PM_NODE_END(argument)) {
+        PM_NODE_LENGTH_SET_NODE(node, argument);
+    }
+
+    pm_node_list_append(arena, &node->arguments, argument);
 
     if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
         if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
-            pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
+            pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
         } else {
-            pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
+            pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
         }
     }
 }
@@ -2100,43 +2450,49 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen
  */
 static pm_array_node_t *
 pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
-    pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
-
-    *node = (pm_array_node_t) {
-        {
-            .type = PM_ARRAY_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(opening)
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .elements = { 0 }
-    };
-
-    return node;
+    if (opening == NULL) {
+        return pm_array_node_new(
+            parser->arena,
+            ++parser->node_id,
+            PM_NODE_FLAG_STATIC_LITERAL,
+            PM_LOCATION_INIT_UNSET,
+            ((pm_node_list_t) { 0 }),
+            ((pm_location_t) { 0 }),
+            ((pm_location_t) { 0 })
+        );
+    } else {
+        return pm_array_node_new(
+            parser->arena,
+            ++parser->node_id,
+            PM_NODE_FLAG_STATIC_LITERAL,
+            PM_LOCATION_INIT_TOKEN(parser, opening),
+            ((pm_node_list_t) { 0 }),
+            TOK2LOC(parser, opening),
+            TOK2LOC(parser, opening)
+        );
+    }
 }
 
 /**
  * Append an argument to an array node.
  */
-static inline void
-pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
-    if (!node->elements.size && !node->opening_loc.start) {
-        node->base.location.start = element->location.start;
+static PRISM_INLINE void
+pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
+    if (!node->elements.size && !node->opening_loc.length) {
+        PM_NODE_START_SET_NODE(node, element);
     }
 
-    pm_node_list_append(&node->elements, element);
-    node->base.location.end = element->location.end;
+    pm_node_list_append(arena, &node->elements, element);
+    PM_NODE_LENGTH_SET_NODE(node, element);
 
     // If the element is not a static literal, then the array is not a static
     // literal. Turn that flag off.
     if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
-        pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
+        pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
     }
 
     if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
-        pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
+        pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
     }
 }
 
@@ -2144,10 +2500,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
  * Set the closing token and end location of an array node.
  */
 static void
-pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
-    assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
-    node->base.location.end = closing->end;
-    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
+    assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+    node->closing_loc = TOK2LOC(parser, closing);
 }
 
 /**
@@ -2156,24 +2512,18 @@ pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
  */
 static pm_array_pattern_node_t *
 pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
-    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
-    *node = (pm_array_pattern_node_t) {
-        {
-            .type = PM_ARRAY_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = nodes->nodes[0]->location.start,
-                .end = nodes->nodes[nodes->size - 1]->location.end
-            },
-        },
-        .constant = NULL,
-        .rest = NULL,
-        .requireds = { 0 },
-        .posts = { 0 },
-        .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
+    pm_array_pattern_node_t *node = pm_array_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 
     // For now we're going to just copy over each pointer manually. This could be
     // much more efficient, as we could instead resize the node list.
@@ -2185,9 +2535,9 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
             node->rest = child;
             found_rest = true;
         } else if (found_rest) {
-            pm_node_list_append(&node->posts, child);
+            pm_node_list_append(parser->arena, &node->posts, child);
         } else {
-            pm_node_list_append(&node->requireds, child);
+            pm_node_list_append(parser->arena, &node->requireds, child);
         }
     }
 
@@ -2199,23 +2549,18 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
  */
 static pm_array_pattern_node_t *
 pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
-    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
-    *node = (pm_array_pattern_node_t) {
-        {
-            .type = PM_ARRAY_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = rest->location,
-        },
-        .constant = NULL,
-        .rest = rest,
-        .requireds = { 0 },
-        .posts = { 0 },
-        .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    return pm_array_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODE(rest),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        rest,
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -2224,26 +2569,18 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
  */
 static pm_array_pattern_node_t *
 pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
-    *node = (pm_array_pattern_node_t) {
-        {
-            .type = PM_ARRAY_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = constant->location.start,
-                .end = closing->end
-            },
-        },
-        .constant = constant,
-        .rest = NULL,
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .requireds = { 0 },
-        .posts = { 0 }
-    };
-
-    return node;
+    return pm_array_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
+        constant,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -2252,31 +2589,23 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant,
  */
 static pm_array_pattern_node_t *
 pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
-    *node = (pm_array_pattern_node_t) {
-        {
-            .type = PM_ARRAY_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            },
-        },
-        .constant = NULL,
-        .rest = NULL,
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .requireds = { 0 },
-        .posts = { 0 }
-    };
-
-    return node;
+    return pm_array_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
-static inline void
-pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
-    pm_node_list_append(&node->requireds, inner);
+static PRISM_INLINE void
+pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
+    pm_node_list_append(arena, &node->requireds, inner);
 }
 
 /**
@@ -2284,15 +2613,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t
  */
 static pm_assoc_node_t *
 pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
-    pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
-    const uint8_t *end;
+    uint32_t end;
 
-    if (value != NULL && value->location.end > key->location.end) {
-        end = value->location.end;
-    } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
-        end = operator->end;
+    if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
+        end = PM_NODE_END(value);
+    } else if (operator != NULL) {
+        end = PM_TOKEN_END(parser, operator);
     } else {
-        end = key->location.end;
+        end = PM_NODE_END(key);
     }
 
     // Hash string keys will be frozen, so we can mark them as frozen here so
@@ -2312,22 +2640,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
         flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
     }
 
-    *node = (pm_assoc_node_t) {
-        {
-            .type = PM_ASSOC_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = key->location.start,
-                .end = end
-            },
-        },
-        .key = key,
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_assoc_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
+        key,
+        value,
+        NTOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2336,22 +2657,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
 static pm_assoc_splat_node_t *
 pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
     assert(operator->type == PM_TOKEN_USTAR_STAR);
-    pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
-
-    *node = (pm_assoc_splat_node_t) {
-        {
-            .type = PM_ASSOC_SPLAT_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = value == NULL ? operator->end : value->location.end
-            },
-        },
-        .value = value,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
 
-    return node;
+    return pm_assoc_splat_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2360,18 +2674,14 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token
 static pm_back_reference_read_node_t *
 pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
     assert(name->type == PM_TOKEN_BACK_REFERENCE);
-    pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
 
-    *node = (pm_back_reference_read_node_t) {
-        {
-            .type = PM_BACK_REFERENCE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name),
-        },
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    return pm_back_reference_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
@@ -2379,23 +2689,21 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name)
  */
 static pm_begin_node_t *
 pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
-    pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
-
-    *node = (pm_begin_node_t) {
-        {
-            .type = PM_BEGIN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = begin_keyword->start,
-                .end = statements == NULL ? begin_keyword->end : statements->base.location.end
-            },
-        },
-        .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
-        .statements = statements,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
+    uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
+
+    return pm_begin_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NTOK2LOC(parser, begin_keyword),
+        statements,
+        NULL,
+        NULL,
+        NULL,
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -2403,11 +2711,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st
  */
 static void
 pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
-    // If the begin keyword doesn't exist, we set the start on the begin_node
-    if (!node->begin_keyword_loc.start) {
-        node->base.location.start = rescue_clause->base.location.start;
+    if (node->begin_keyword_loc.length == 0) {
+        PM_NODE_START_SET_NODE(node, rescue_clause);
     }
-    node->base.location.end = rescue_clause->base.location.end;
+    PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
     node->rescue_clause = rescue_clause;
 }
 
@@ -2416,7 +2723,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_
  */
 static void
 pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
-    node->base.location.end = else_clause->base.location.end;
+    if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+        PM_NODE_START_SET_NODE(node, else_clause);
+    }
+    PM_NODE_LENGTH_SET_NODE(node, else_clause);
     node->else_clause = else_clause;
 }
 
@@ -2425,7 +2735,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause
  */
 static void
 pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
-    node->base.location.end = ensure_clause->base.location.end;
+    if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+        PM_NODE_START_SET_NODE(node, ensure_clause);
+    }
+    PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
     node->ensure_clause = ensure_clause;
 }
 
@@ -2433,11 +2746,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_
  * Set the end keyword and end location of a begin node.
  */
 static void
-pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
-    assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
-
-    node->base.location.end = end_keyword->end;
-    node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
+pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
+    assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+    node->end_keyword_loc = TOK2LOC(parser, end_keyword);
 }
 
 /**
@@ -2445,22 +2757,16 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo
  */
 static pm_block_argument_node_t *
 pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
-    pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
-
-    *node = (pm_block_argument_node_t) {
-        {
-            .type = PM_BLOCK_ARGUMENT_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = expression == NULL ? operator->end : expression->location.end
-            },
-        },
-        .expression = expression,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    assert(operator->type == PM_TOKEN_UAMPERSAND);
+
+    return pm_block_argument_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+        expression,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2468,22 +2774,17 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
  */
 static pm_block_node_t *
 pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
-    pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
-
-    *node = (pm_block_node_t) {
-        {
-            .type = PM_BLOCK_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = { .start = opening->start, .end = closing->end },
-        },
-        .locals = *locals,
-        .parameters = parameters,
-        .body = body,
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
-    };
-
-    return node;
+    return pm_block_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        *locals,
+        parameters,
+        body,
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -2491,24 +2792,17 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
  */
 static pm_block_parameter_node_t *
 pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
-    assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
-    pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
-
-    *node = (pm_block_parameter_node_t) {
-        {
-            .type = PM_BLOCK_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
-            },
-        },
-        .name = pm_parser_optional_constant_id_token(parser, name),
-        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
+
+    return pm_block_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+        name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+        NTOK2LOC(parser, name),
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -2516,53 +2810,44 @@ pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, cons
  */
 static pm_block_parameters_node_t *
 pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
-    pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
-
-    const uint8_t *start;
-    if (opening->type != PM_TOKEN_NOT_PROVIDED) {
-        start = opening->start;
+    uint32_t start;
+    if (opening != NULL) {
+        start = PM_TOKEN_START(parser, opening);
     } else if (parameters != NULL) {
-        start = parameters->base.location.start;
+        start = PM_NODE_START(parameters);
     } else {
-        start = NULL;
+        start = 0;
     }
 
-    const uint8_t *end;
+    uint32_t end;
     if (parameters != NULL) {
-        end = parameters->base.location.end;
-    } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
-        end = opening->end;
+        end = PM_NODE_END(parameters);
+    } else if (opening != NULL) {
+        end = PM_TOKEN_END(parser, opening);
     } else {
-        end = NULL;
-    }
-
-    *node = (pm_block_parameters_node_t) {
-        {
-            .type = PM_BLOCK_PARAMETERS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = start,
-                .end = end
-            }
-        },
-        .parameters = parameters,
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .locals = { 0 }
-    };
-
-    return node;
+        end = 0;
+    }
+
+    return pm_block_parameters_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        parameters,
+        ((pm_node_list_t) { 0 }),
+        NTOK2LOC(parser, opening),
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
  * Set the closing location of a BlockParametersNode node.
  */
 static void
-pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
-    assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
-
-    node->base.location.end = closing->end;
-    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
+    assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+    node->closing_loc = TOK2LOC(parser, closing);
 }
 
 /**
@@ -2570,29 +2855,27 @@ pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_
  */
 static pm_block_local_variable_node_t *
 pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
-
-    *node = (pm_block_local_variable_node_t) {
-        {
-            .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name),
-        },
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    return pm_block_local_variable_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
  * Append a new block-local variable to a BlockParametersNode node.
  */
 static void
-pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
-    pm_node_list_append(&node->locals, (pm_node_t *) local);
+pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
+    pm_node_list_append(arena, &node->locals, UP(local));
+
+    if (PM_NODE_LENGTH(node) == 0) {
+        PM_NODE_START_SET_NODE(node, local);
+    }
 
-    if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
-    node->base.location.end = local->base.location.end;
+    PM_NODE_LENGTH_SET_NODE(node, local);
 }
 
 /**
@@ -2601,66 +2884,55 @@ pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm
 static pm_break_node_t *
 pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
     assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
-    pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
 
-    *node = (pm_break_node_t) {
-        {
-            .type = PM_BREAK_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
-            },
-        },
-        .arguments = arguments,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
-    };
-
-    return node;
+    return pm_break_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+        arguments,
+        TOK2LOC(parser, keyword)
+    );
 }
 
 // There are certain flags that we want to use internally but don't want to
 // expose because they are not relevant beyond parsing. Therefore we'll define
 // them here and not define them in config.yml/a header file.
-static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
+static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
+
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
 
 /**
- * Allocate and initialize a new CallNode node. This sets everything to NULL or
- * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
- * in the various specializations of this function.
+ * Allocate and initialize a new CallNode node. This sets everything to NULL
+ * such that its values can be overridden in the various specializations of this
+ * function.
  */
 static pm_call_node_t *
 pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
-    pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
-
-    *node = (pm_call_node_t) {
-        {
-            .type = PM_CALL_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser),
-        },
-        .receiver = NULL,
-        .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .arguments = NULL,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .block = NULL,
-        .name = 0
-    };
-
-    return node;
+    return pm_call_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_UNSET,
+        NULL,
+        ((pm_location_t) { 0 }),
+        0,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        NULL,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        NULL
+    );
 }
 
 /**
  * Returns the value that the ignore visibility flag should be set to for the
  * given receiver.
  */
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
 pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
     return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
 }
@@ -2680,12 +2952,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
 
     pm_call_node_t *node = pm_call_node_create(parser, flags);
 
-    node->base.location.start = receiver->location.start;
-    node->base.location.end = pm_arguments_end(arguments);
+    PM_NODE_START_SET_NODE(node, receiver);
+
+    const pm_location_t *end = pm_arguments_end(arguments);
+    assert(end != NULL && "unreachable");
+    PM_NODE_LENGTH_SET_LOCATION(node, end);
 
     node->receiver = receiver;
     node->message_loc.start = arguments->opening_loc.start;
-    node->message_loc.end = arguments->closing_loc.end;
+    node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
 
     node->opening_loc = arguments->opening_loc;
     node->arguments = arguments->arguments;
@@ -2706,20 +2981,22 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
 
     pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
 
-    node->base.location.start = MIN(receiver->location.start, argument->location.start);
-    node->base.location.end = MAX(receiver->location.end, argument->location.end);
+    PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
+    PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
 
     node->receiver = receiver;
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+    node->message_loc = TOK2LOC(parser, operator);
 
     pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
-    pm_arguments_node_arguments_append(arguments, argument);
+    pm_arguments_node_arguments_append(parser->arena, arguments, argument);
     node->arguments = arguments;
 
     node->name = pm_parser_constant_id_token(parser, operator);
     return node;
 }
 
+static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
+
 /**
  * Allocate and initialize a new CallNode node from a call expression.
  */
@@ -2729,26 +3006,31 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
 
     pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
 
-    node->base.location.start = receiver->location.start;
-    const uint8_t *end = pm_arguments_end(arguments);
+    PM_NODE_START_SET_NODE(node, receiver);
+    const pm_location_t *end = pm_arguments_end(arguments);
     if (end == NULL) {
-        end = message->end;
+        PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
+    } else {
+        PM_NODE_LENGTH_SET_LOCATION(node, end);
     }
-    node->base.location.end = end;
 
     node->receiver = receiver;
-    node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->call_operator_loc = TOK2LOC(parser, operator);
+    node->message_loc = TOK2LOC(parser, message);
     node->opening_loc = arguments->opening_loc;
     node->arguments = arguments->arguments;
     node->closing_loc = arguments->closing_loc;
     node->block = arguments->block;
 
     if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
-        pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+        pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
     }
 
-    node->name = pm_parser_constant_id_token(parser, message);
+    /**
+    * If the final character is `@` as is the case for `foo.~@`,
+    * we should ignore the @ in the same way we do for symbols.
+    */
+    node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
     return node;
 }
 
@@ -2758,12 +3040,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
 static pm_call_node_t *
 pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
     pm_call_node_t *node = pm_call_node_create(parser, 0);
-    node->base.location.start = parser->start;
-    node->base.location.end = parser->end;
+    node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
 
     node->receiver = receiver;
-    node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
-    node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
     node->arguments = arguments;
 
     node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
@@ -2778,10 +3057,12 @@ static pm_call_node_t *
 pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
     pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
 
-    node->base.location.start = message->start;
-    node->base.location.end = pm_arguments_end(arguments);
+    PM_NODE_START_SET_TOKEN(parser, node, message);
+    const pm_location_t *end = pm_arguments_end(arguments);
+    assert(end != NULL && "unreachable");
+    PM_NODE_LENGTH_SET_LOCATION(node, end);
 
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->message_loc = TOK2LOC(parser, message);
     node->opening_loc = arguments->opening_loc;
     node->arguments = arguments->arguments;
     node->closing_loc = arguments->closing_loc;
@@ -2799,7 +3080,7 @@ static pm_call_node_t *
 pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
     pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
 
-    node->base.location = PM_LOCATION_NULL_VALUE(parser);
+    node->base.location = (pm_location_t) { 0 };
     node->arguments = arguments;
 
     node->name = name;
@@ -2816,16 +3097,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
 
     pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
 
-    node->base.location.start = message->start;
-    if (arguments->closing_loc.start != NULL) {
-        node->base.location.end = arguments->closing_loc.end;
+    PM_NODE_START_SET_TOKEN(parser, node, message);
+    if (arguments->closing_loc.length > 0) {
+        PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
     } else {
         assert(receiver != NULL);
-        node->base.location.end = receiver->location.end;
+        PM_NODE_LENGTH_SET_NODE(node, receiver);
     }
 
     node->receiver = receiver;
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->message_loc = TOK2LOC(parser, message);
     node->opening_loc = arguments->opening_loc;
     node->arguments = arguments->arguments;
     node->closing_loc = arguments->closing_loc;
@@ -2843,18 +3124,20 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
 
     pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
 
-    node->base.location.start = receiver->location.start;
-    node->base.location.end = pm_arguments_end(arguments);
+    PM_NODE_START_SET_NODE(node, receiver);
+    const pm_location_t *end = pm_arguments_end(arguments);
+    assert(end != NULL && "unreachable");
+    PM_NODE_LENGTH_SET_LOCATION(node, end);
 
     node->receiver = receiver;
-    node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+    node->call_operator_loc = TOK2LOC(parser, operator);
     node->opening_loc = arguments->opening_loc;
     node->arguments = arguments->arguments;
     node->closing_loc = arguments->closing_loc;
     node->block = arguments->block;
 
     if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
-        pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+        pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
     }
 
     node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -2870,11 +3153,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
 
     pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
 
-    node->base.location.start = operator->start;
-    node->base.location.end = receiver->location.end;
+    PM_NODE_START_SET_TOKEN(parser, node, operator);
+    PM_NODE_LENGTH_SET_NODE(node, receiver);
 
     node->receiver = receiver;
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+    node->message_loc = TOK2LOC(parser, operator);
 
     node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
     return node;
@@ -2888,8 +3171,8 @@ static pm_call_node_t *
 pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
     pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
 
-    node->base.location = PM_LOCATION_TOKEN_VALUE(message);
-    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->base.location = TOK2LOC(parser, message);
+    node->message_loc = TOK2LOC(parser, message);
 
     node->name = pm_parser_constant_id_token(parser, message);
     return node;
@@ -2899,14 +3182,14 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
  * Returns whether or not this call can be used on the left-hand side of an
  * operator assignment.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
     return (
-        (node->message_loc.start != NULL) &&
-        (node->message_loc.end[-1] != '!') &&
-        (node->message_loc.end[-1] != '?') &&
-        char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
-        (node->opening_loc.start == NULL) &&
+        (node->message_loc.length > 0) &&
+        (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
+        (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
+        char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
+        (node->opening_loc.length == 0) &&
         (node->arguments == NULL) &&
         (node->block == NULL)
     );
@@ -2922,10 +3205,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p
     if (write_constant->length > 0) {
         size_t length = write_constant->length - 1;
 
-        void *memory = xmalloc(length);
+        uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
         memcpy(memory, write_constant->start, length);
 
-        *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
+        *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
     } else {
         // We can get here if the message was missing because of a syntax error.
         *read_name = pm_parser_constant_id_constant(parser, "", 0);
@@ -2939,33 +3222,25 @@ static pm_call_and_write_node_t *
 pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(target->block == NULL);
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
 
-    *node = (pm_call_and_write_node_t) {
-        {
-            .type = PM_CALL_AND_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .message_loc = target->message_loc,
-        .read_name = 0,
-        .write_name = target->name,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
+    pm_call_and_write_node_t *node = pm_call_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->message_loc,
+        0,
+        target->name,
+        TOK2LOC(parser, operator),
+        value
+    );
 
     pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -2976,7 +3251,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
  */
 static void
 pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
-    if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
+    if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
         if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
             pm_node_t *node;
             PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
@@ -2999,35 +3274,28 @@ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *argumen
 static pm_index_and_write_node_t *
 pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
 
     pm_index_arguments_check(parser, target->arguments, target->block);
 
     assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
-    *node = (pm_index_and_write_node_t) {
-        {
-            .type = PM_INDEX_AND_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .opening_loc = target->opening_loc,
-        .arguments = target->arguments,
-        .closing_loc = target->closing_loc,
-        .block = (pm_block_argument_node_t *) target->block,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    pm_index_and_write_node_t *node = pm_index_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->opening_loc,
+        target->arguments,
+        target->closing_loc,
+        (pm_block_argument_node_t *) target->block,
+        TOK2LOC(parser, operator),
+        value
+    );
+
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3038,34 +3306,26 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
 static pm_call_operator_write_node_t *
 pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(target->block == NULL);
-    pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
 
-    *node = (pm_call_operator_write_node_t) {
-        {
-            .type = PM_CALL_OPERATOR_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .message_loc = target->message_loc,
-        .read_name = 0,
-        .write_name = target->name,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
+    pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->message_loc,
+        0,
+        target->name,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+        TOK2LOC(parser, operator),
+        value
+    );
 
     pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3075,36 +3335,28 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
  */
 static pm_index_operator_write_node_t *
 pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
-
     pm_index_arguments_check(parser, target->arguments, target->block);
 
     assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
-    *node = (pm_index_operator_write_node_t) {
-        {
-            .type = PM_INDEX_OPERATOR_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .opening_loc = target->opening_loc,
-        .arguments = target->arguments,
-        .closing_loc = target->closing_loc,
-        .block = (pm_block_argument_node_t *) target->block,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->opening_loc,
+        target->arguments,
+        target->closing_loc,
+        (pm_block_argument_node_t *) target->block,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+        TOK2LOC(parser, operator),
+        value
+    );
+
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3116,33 +3368,25 @@ static pm_call_or_write_node_t *
 pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(target->block == NULL);
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
 
-    *node = (pm_call_or_write_node_t) {
-        {
-            .type = PM_CALL_OR_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .message_loc = target->message_loc,
-        .read_name = 0,
-        .write_name = target->name,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
+    pm_call_or_write_node_t *node = pm_call_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->message_loc,
+        0,
+        target->name,
+        TOK2LOC(parser, operator),
+        value
+    );
 
     pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3153,35 +3397,28 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
 static pm_index_or_write_node_t *
 pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
 
     pm_index_arguments_check(parser, target->arguments, target->block);
 
     assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
-    *node = (pm_index_or_write_node_t) {
-        {
-            .type = PM_INDEX_OR_WRITE_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .opening_loc = target->opening_loc,
-        .arguments = target->arguments,
-        .closing_loc = target->closing_loc,
-        .block = (pm_block_argument_node_t *) target->block,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    pm_index_or_write_node_t *node = pm_index_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->receiver,
+        target->call_operator_loc,
+        target->opening_loc,
+        target->arguments,
+        target->closing_loc,
+        (pm_block_argument_node_t *) target->block,
+        TOK2LOC(parser, operator),
+        value
+    );
+
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3192,25 +3429,27 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
  */
 static pm_call_target_node_t *
 pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
-    pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
+    pm_call_target_node_t *node = pm_call_target_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target),
+        PM_LOCATION_INIT_NODE(target),
+        target->receiver,
+        target->call_operator_loc,
+        target->name,
+        target->message_loc
+    );
 
-    *node = (pm_call_target_node_t) {
-        {
-            .type = PM_CALL_TARGET_NODE,
-            .flags = target->base.flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = target->base.location
-        },
-        .receiver = target->receiver,
-        .call_operator_loc = target->call_operator_loc,
-        .name = target->name,
-        .message_loc = target->message_loc
-    };
+    /* It is possible to get here where we have parsed an invalid syntax tree
+     * where the call operator was not present. In that case we will have a
+     * problem because it is a required location. In this case we need to fill
+     * it in with a fake location so that the syntax tree remains valid. */
+    if (node->call_operator_loc.length == 0) {
+        node->call_operator_loc = target->base.location;
+    }
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3221,30 +3460,23 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
  */
 static pm_index_target_node_t *
 pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
-    pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
-    pm_node_flags_t flags = target->base.flags;
-
     pm_index_arguments_check(parser, target->arguments, target->block);
-
     assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
-    *node = (pm_index_target_node_t) {
-        {
-            .type = PM_INDEX_TARGET_NODE,
-            .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = target->base.location
-        },
-        .receiver = target->receiver,
-        .opening_loc = target->opening_loc,
-        .arguments = target->arguments,
-        .closing_loc = target->closing_loc,
-        .block = (pm_block_argument_node_t *) target->block,
-    };
 
-    // Here we're going to free the target, since it is no longer necessary.
-    // However, we don't want to call `pm_node_destroy` because we want to keep
-    // around all of its children since we just reused them.
-    xfree(target);
+    pm_index_target_node_t *node = pm_index_target_node_new(
+        parser->arena,
+        ++parser->node_id,
+        FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
+        PM_LOCATION_INIT_NODE(target),
+        target->receiver,
+        target->opening_loc,
+        target->arguments,
+        target->closing_loc,
+        (pm_block_argument_node_t *) target->block
+    );
+
+    // The target is no longer necessary because we've reused its children.
+    // It is arena-allocated so no explicit free is needed.
 
     return node;
 }
@@ -3254,23 +3486,15 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
  */
 static pm_capture_pattern_node_t *
 pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
-    pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
-
-    *node = (pm_capture_pattern_node_t) {
-        {
-            .type = PM_CAPTURE_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = value->location.start,
-                .end = target->base.location.end
-            },
-        },
-        .value = value,
-        .target = target,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_capture_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(value, target),
+        value,
+        target,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -3278,36 +3502,28 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v
  */
 static pm_case_node_t *
 pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
-    pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
-
-    *node = (pm_case_node_t) {
-        {
-            .type = PM_CASE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = case_keyword->start,
-                .end = end_keyword->end
-            },
-        },
-        .predicate = predicate,
-        .else_clause = NULL,
-        .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
-        .conditions = { 0 }
-    };
-
-    return node;
+    return pm_case_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
+        predicate,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        TOK2LOC(parser, case_keyword),
+        NTOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
  * Append a new condition to a CaseNode node.
  */
 static void
-pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
+pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
     assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
 
-    pm_node_list_append(&node->conditions, condition);
-    node->base.location.end = condition->location.end;
+    pm_node_list_append(arena, &node->conditions, condition);
+    PM_NODE_LENGTH_SET_NODE(node, condition);
 }
 
 /**
@@ -3316,53 +3532,45 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
 static void
 pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
     node->else_clause = else_clause;
-    node->base.location.end = else_clause->base.location.end;
+    PM_NODE_LENGTH_SET_NODE(node, else_clause);
 }
 
 /**
  * Set the end location for a CaseNode node.
  */
 static void
-pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
-    node->base.location.end = end_keyword->end;
-    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+    node->end_keyword_loc = TOK2LOC(parser, end_keyword);
 }
 
 /**
  * Allocate and initialize a new CaseMatchNode node.
  */
 static pm_case_match_node_t *
-pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
-    pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
-
-    *node = (pm_case_match_node_t) {
-        {
-            .type = PM_CASE_MATCH_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = case_keyword->start,
-                .end = end_keyword->end
-            },
-        },
-        .predicate = predicate,
-        .else_clause = NULL,
-        .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
-        .conditions = { 0 }
-    };
-
-    return node;
+pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
+    return pm_case_match_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, case_keyword),
+        predicate,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        TOK2LOC(parser, case_keyword),
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
  * Append a new condition to a CaseMatchNode node.
  */
 static void
-pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
+pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
     assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
 
-    pm_node_list_append(&node->conditions, condition);
-    node->base.location.end = condition->location.end;
+    pm_node_list_append(arena, &node->conditions, condition);
+    PM_NODE_LENGTH_SET_NODE(node, condition);
 }
 
 /**
@@ -3371,16 +3579,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi
 static void
 pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
     node->else_clause = else_clause;
-    node->base.location.end = else_clause->base.location.end;
+    PM_NODE_LENGTH_SET_NODE(node, else_clause);
 }
 
 /**
  * Set the end location for a CaseMatchNode node.
  */
 static void
-pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
-    node->base.location.end = end_keyword->end;
-    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+    node->end_keyword_loc = TOK2LOC(parser, end_keyword);
 }
 
 /**
@@ -3388,25 +3596,20 @@ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_toke
  */
 static pm_class_node_t *
 pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
-    pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
-
-    *node = (pm_class_node_t) {
-        {
-            .type = PM_CLASS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = { .start = class_keyword->start, .end = end_keyword->end },
-        },
-        .locals = *locals,
-        .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
-        .constant_path = constant_path,
-        .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
-        .superclass = superclass,
-        .body = body,
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    return pm_class_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+        *locals,
+        TOK2LOC(parser, class_keyword),
+        constant_path,
+        NTOK2LOC(parser, inheritance_operator),
+        superclass,
+        body,
+        TOK2LOC(parser, end_keyword),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
@@ -3415,24 +3618,17 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
 static pm_class_variable_and_write_node_t *
 pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
 
-    *node = (pm_class_variable_and_write_node_t) {
-        {
-            .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_class_variable_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3440,25 +3636,17 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r
  */
 static pm_class_variable_operator_write_node_t *
 pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
-
-    *node = (pm_class_variable_operator_write_node_t) {
-        {
-            .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
-    };
-
-    return node;
+    return pm_class_variable_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+    );
 }
 
 /**
@@ -3467,24 +3655,17 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
 static pm_class_variable_or_write_node_t *
 pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
-
-    *node = (pm_class_variable_or_write_node_t) {
-        {
-            .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    return node;
+    return pm_class_variable_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3493,18 +3674,14 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re
 static pm_class_variable_read_node_t *
 pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_CLASS_VARIABLE);
-    pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
-
-    *node = (pm_class_variable_read_node_t) {
-        {
-            .type = PM_CLASS_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .name = pm_parser_constant_id_token(parser, token)
-    };
 
-    return node;
+    return pm_class_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        pm_parser_constant_id_token(parser, token)
+    );
 }
 
 /**
@@ -3513,9 +3690,9 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token)
  *     a = *b
  *     a = 1, 2, 3
  */
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
 pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
-    if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
+    if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
         return flags;
     }
     return 0;
@@ -3526,25 +3703,16 @@ pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
  */
 static pm_class_variable_write_node_t *
 pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
-    pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
-
-    *node = (pm_class_variable_write_node_t) {
-        {
-            .type = PM_CLASS_VARIABLE_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = read_node->base.location.start,
-                .end = value->location.end
-            },
-        },
-        .name = read_node->name,
-        .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_class_variable_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(read_node, value),
+        read_node->name,
+        read_node->base.location,
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -3553,23 +3721,16 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_
 static pm_constant_path_and_write_node_t *
 pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
-
-    *node = (pm_constant_path_and_write_node_t) {
-        {
-            .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .target = target,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    return node;
+    return pm_constant_path_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3577,24 +3738,16 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod
  */
 static pm_constant_path_operator_write_node_t *
 pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
-
-    *node = (pm_constant_path_operator_write_node_t) {
-        {
-            .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .target = target,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
-    };
-
-    return node;
+    return pm_constant_path_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target,
+        TOK2LOC(parser, operator),
+        value,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+    );
 }
 
 /**
@@ -3603,23 +3756,16 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
 static pm_constant_path_or_write_node_t *
 pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
 
-    *node = (pm_constant_path_or_write_node_t) {
-        {
-            .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .target = target,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_constant_path_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3628,29 +3774,22 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
 static pm_constant_path_node_t *
 pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
     pm_assert_value_expression(parser, parent);
-    pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
 
     pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
     if (name_token->type == PM_TOKEN_CONSTANT) {
         name = pm_parser_constant_id_token(parser, name_token);
     }
 
-    *node = (pm_constant_path_node_t) {
-        {
-            .type = PM_CONSTANT_PATH_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = parent == NULL ? delimiter->start : parent->location.start,
-                .end = name_token->end
-            },
-        },
-        .parent = parent,
-        .name = name,
-        .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
-        .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
-    };
-
-    return node;
+    return pm_constant_path_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
+        parent,
+        name,
+        TOK2LOC(parser, delimiter),
+        TOK2LOC(parser, name_token)
+    );
 }
 
 /**
@@ -3658,24 +3797,15 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to
  */
 static pm_constant_path_write_node_t *
 pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
-
-    *node = (pm_constant_path_write_node_t) {
-        {
-            .type = PM_CONSTANT_PATH_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            },
-        },
-        .target = target,
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_constant_path_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(target, value),
+        target,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3684,24 +3814,17 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t
 static pm_constant_and_write_node_t *
 pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
 
-    *node = (pm_constant_and_write_node_t) {
-        {
-            .type = PM_CONSTANT_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_constant_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3709,25 +3832,17 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *
  */
 static pm_constant_operator_write_node_t *
 pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
-
-    *node = (pm_constant_operator_write_node_t) {
-        {
-            .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
-    };
-
-    return node;
+    return pm_constant_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+    );
 }
 
 /**
@@ -3736,24 +3851,17 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
 static pm_constant_or_write_node_t *
 pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
-
-    *node = (pm_constant_or_write_node_t) {
-        {
-            .type = PM_CONSTANT_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    return node;
+    return pm_constant_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -3761,19 +3869,15 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t
  */
 static pm_constant_read_node_t *
 pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
-    pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
-
-    *node = (pm_constant_read_node_t) {
-        {
-            .type = PM_CONSTANT_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name)
-        },
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
+
+    return pm_constant_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
@@ -3781,25 +3885,16 @@ pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
  */
 static pm_constant_write_node_t *
 pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
-
-    *node = (pm_constant_write_node_t) {
-        {
-            .type = PM_CONSTANT_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_constant_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -3810,7 +3905,7 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
     switch (PM_NODE_TYPE(node)) {
         case PM_BEGIN_NODE: {
             const pm_begin_node_t *cast = (pm_begin_node_t *) node;
-            if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
+            if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
             break;
         }
         case PM_PARENTHESES_NODE: {
@@ -3865,65 +3960,45 @@ pm_def_node_create(
     const pm_token_t *equal,
     const pm_token_t *end_keyword
 ) {
-    pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
-    const uint8_t *end;
-
-    if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
-        end = body->location.end;
-    } else {
-        end = end_keyword->end;
-    }
-
     if (receiver != NULL) {
         pm_def_node_receiver_check(parser, receiver);
     }
 
-    *node = (pm_def_node_t) {
-        {
-            .type = PM_DEF_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = { .start = def_keyword->start, .end = end },
-        },
-        .name = name,
-        .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
-        .receiver = receiver,
-        .parameters = parameters,
-        .body = body,
-        .locals = *locals,
-        .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
-        .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
-        .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+    return pm_def_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
+        name,
+        TOK2LOC(parser, name_loc),
+        receiver,
+        parameters,
+        body,
+        *locals,
+        TOK2LOC(parser, def_keyword),
+        NTOK2LOC(parser, operator),
+        NTOK2LOC(parser, lparen),
+        NTOK2LOC(parser, rparen),
+        NTOK2LOC(parser, equal),
+        NTOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
  * Allocate a new DefinedNode node.
  */
 static pm_defined_node_t *
-pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
-    pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
-
-    *node = (pm_defined_node_t) {
-        {
-            .type = PM_DEFINED_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword_loc->start,
-                .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
-            },
-        },
-        .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
-        .value = value,
-        .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
-        .keyword_loc = *keyword_loc
-    };
-
-    return node;
+pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
+    return pm_defined_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
+        NTOK2LOC(parser, lparen),
+        value,
+        NTOK2LOC(parser, rparen),
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
@@ -3931,29 +4006,15 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t
  */
 static pm_else_node_t *
 pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
-    pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
-    const uint8_t *end = NULL;
-    if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
-        end = statements->base.location.end;
-    } else {
-        end = end_keyword->end;
-    }
-
-    *node = (pm_else_node_t) {
-        {
-            .type = PM_ELSE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = else_keyword->start,
-                .end = end,
-            },
-        },
-        .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
-        .statements = statements,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+    return pm_else_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
+        TOK2LOC(parser, else_keyword),
+        statements,
+        NTOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
@@ -3961,23 +4022,15 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat
  */
 static pm_embedded_statements_node_t *
 pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
-    pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
-
-    *node = (pm_embedded_statements_node_t) {
-        {
-            .type = PM_EMBEDDED_STATEMENTS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            }
-        },
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .statements = statements,
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
-    };
-
-    return node;
+    return pm_embedded_statements_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        TOK2LOC(parser, opening),
+        statements,
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -3985,22 +4038,14 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin
  */
 static pm_embedded_variable_node_t *
 pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
-    pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
-
-    *node = (pm_embedded_variable_node_t) {
-        {
-            .type = PM_EMBEDDED_VARIABLE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = variable->location.end
-            }
-        },
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .variable = variable
-    };
-
-    return node;
+    return pm_embedded_variable_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+        TOK2LOC(parser, operator),
+        variable
+    );
 }
 
 /**
@@ -4008,23 +4053,15 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator
  */
 static pm_ensure_node_t *
 pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
-    pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
-
-    *node = (pm_ensure_node_t) {
-        {
-            .type = PM_ENSURE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = ensure_keyword->start,
-                .end = end_keyword->end
-            },
-        },
-        .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
-        .statements = statements,
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+    return pm_ensure_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
+        TOK2LOC(parser, ensure_keyword),
+        statements,
+        TOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
@@ -4033,16 +4070,13 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_
 static pm_false_node_t *
 pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_FALSE);
-    pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
 
-    *node = (pm_false_node_t) {{
-        .type = PM_FALSE_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
-
-    return node;
+    return pm_false_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -4051,50 +4085,31 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_find_pattern_node_t *
 pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
-    pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
-
+    assert(nodes->size >= 2);
     pm_node_t *left = nodes->nodes[0];
-    assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
-    pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
-
-    pm_node_t *right;
+    pm_node_t *right = nodes->nodes[nodes->size - 1];
 
-    if (nodes->size == 1) {
-        right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
-    } else {
-        right = nodes->nodes[nodes->size - 1];
-        assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
-    }
-
-#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
-    // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
-    // The resulting AST will anyway be ignored, but this file still needs to compile.
-    pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
-#else
-    pm_node_t *right_splat_node = right;
-#endif
-    *node = (pm_find_pattern_node_t) {
-        {
-            .type = PM_FIND_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = left->location.start,
-                .end = right->location.end,
-            },
-        },
-        .constant = NULL,
-        .left = left_splat_node,
-        .right = right_splat_node,
-        .requireds = { 0 },
-        .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
+    assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
+    assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
+
+    pm_find_pattern_node_t *node = pm_find_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(left, right),
+        NULL,
+        (pm_splat_node_t *) left,
+        ((pm_node_list_t) { 0 }),
+        (pm_splat_node_t *) right,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 
     // For now we're going to just copy over each pointer manually. This could be
     // much more efficient, as we could instead resize the node list to only point
     // to 1...-1.
     for (size_t index = 1; index < nodes->size - 1; index++) {
-        pm_node_list_append(&node->requireds, nodes->nodes[index]);
+        pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
     }
 
     return node;
@@ -4111,7 +4126,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
 
     // First, get a buffer of the content.
     size_t length = (size_t) diff;
-    char *buffer = xmalloc(sizeof(char) * (length + 1));
+    const size_t buffer_size = sizeof(char) * (length + 1);
+    char *buffer = xmalloc(buffer_size);
     memcpy((void *) buffer, token->start, length);
 
     // Next, determine if we need to replace the decimal point because of
@@ -4145,8 +4161,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
     // This should never happen, because we've already checked that the token
     // is in a valid format. However it's good to be safe.
     if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
-        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
-        xfree((void *) buffer);
+        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
+        xfree_sized(buffer, buffer_size);
         return 0.0;
     }
 
@@ -4164,12 +4180,12 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
             ellipsis = "";
         }
 
-        pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+        pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
         value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
     }
 
     // Finally we can free the buffer and return the value.
-    xfree((void *) buffer);
+    xfree_sized(buffer, buffer_size);
     return value;
 }
 
@@ -4179,19 +4195,14 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
 static pm_float_node_t *
 pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_FLOAT);
-    pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
 
-    *node = (pm_float_node_t) {
-        {
-            .type = PM_FLOAT_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .value = pm_double_parse(parser, token)
-    };
-
-    return node;
+    return pm_float_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        pm_double_parse(parser, token)
+    );
 }
 
 /**
@@ -4201,22 +4212,17 @@ static pm_imaginary_node_t *
 pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
 
-    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
-    *node = (pm_imaginary_node_t) {
-        {
-            .type = PM_IMAGINARY_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
+    return pm_imaginary_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        UP(pm_float_node_create(parser, &((pm_token_t) {
             .type = PM_TOKEN_FLOAT,
             .start = token->start,
             .end = token->end - 1
-        }))
-    };
-
-    return node;
+        })))
+    );
 }
 
 /**
@@ -4226,17 +4232,14 @@ static pm_rational_node_t *
 pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
 
-    pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
-    *node = (pm_rational_node_t) {
-        {
-            .type = PM_RATIONAL_NODE,
-            .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numerator = { 0 },
-        .denominator = { 0 }
-    };
+    pm_rational_node_t *node = pm_rational_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        ((pm_integer_t) { 0 }),
+        ((pm_integer_t) { 0 })
+    );
 
     const uint8_t *start = token->start;
     const uint8_t *end = token->end - 1; // r
@@ -4263,12 +4266,18 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
     memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
     pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
 
+    size_t fract_length = 0;
+    for (const uint8_t *fract = point; fract < end; ++fract) {
+        if (*fract != '_') ++fract_length;
+    }
     digits[0] = '1';
-    if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
-    pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
-    xfree(digits);
+    if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
+    pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
+    xfree_sized(digits, length);
 
     pm_integers_reduce(&node->numerator, &node->denominator);
+    pm_integer_arena_move(parser->arena, &node->numerator);
+    pm_integer_arena_move(parser->arena, &node->denominator);
     return node;
 }
 
@@ -4280,22 +4289,17 @@ static pm_imaginary_node_t *
 pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
 
-    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
-    *node = (pm_imaginary_node_t) {
-        {
-            .type = PM_IMAGINARY_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
+    return pm_imaginary_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        UP(pm_float_node_rational_create(parser, &((pm_token_t) {
             .type = PM_TOKEN_FLOAT_RATIONAL,
             .start = token->start,
             .end = token->end - 1
-        }))
-    };
-
-    return node;
+        })))
+    );
 }
 
 /**
@@ -4312,27 +4316,19 @@ pm_for_node_create(
     const pm_token_t *do_keyword,
     const pm_token_t *end_keyword
 ) {
-    pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
-
-    *node = (pm_for_node_t) {
-        {
-            .type = PM_FOR_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = for_keyword->start,
-                .end = end_keyword->end
-            },
-        },
-        .index = index,
-        .collection = collection,
-        .statements = statements,
-        .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
-        .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
-        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+    return pm_for_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
+        index,
+        collection,
+        statements,
+        TOK2LOC(parser, for_keyword),
+        TOK2LOC(parser, in_keyword),
+        NTOK2LOC(parser, do_keyword),
+        TOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
@@ -4341,15 +4337,13 @@ pm_for_node_create(
 static pm_forwarding_arguments_node_t *
 pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
-    pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
-
-    *node = (pm_forwarding_arguments_node_t) {{
-        .type = PM_FORWARDING_ARGUMENTS_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
 
-    return node;
+    return pm_forwarding_arguments_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -4358,15 +4352,13 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token
 static pm_forwarding_parameter_node_t *
 pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
-    pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
-
-    *node = (pm_forwarding_parameter_node_t) {{
-        .type = PM_FORWARDING_PARAMETER_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
 
-    return node;
+    return pm_forwarding_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -4376,26 +4368,20 @@ static pm_forwarding_super_node_t *
 pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
     assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
     assert(token->type == PM_TOKEN_KEYWORD_SUPER);
-    pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
 
     pm_block_node_t *block = NULL;
     if (arguments->block != NULL) {
         block = (pm_block_node_t *) arguments->block;
     }
 
-    *node = (pm_forwarding_super_node_t) {
-        {
-            .type = PM_FORWARDING_SUPER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = token->start,
-                .end = block != NULL ? block->base.location.end : token->end
-            },
-        },
-        .block = block
-    };
-
-    return node;
+    return pm_forwarding_super_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        block
+    );
 }
 
 /**
@@ -4404,25 +4390,17 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
  */
 static pm_hash_pattern_node_t *
 pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
-    *node = (pm_hash_pattern_node_t) {
-        {
-            .type = PM_HASH_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            },
-        },
-        .constant = NULL,
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .elements = { 0 },
-        .rest = NULL
-    };
-
-    return node;
+    return pm_hash_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -4430,46 +4408,36 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
  */
 static pm_hash_pattern_node_t *
 pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
-    pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
-    const uint8_t *start;
-    const uint8_t *end;
+    uint32_t start;
+    uint32_t end;
 
     if (elements->size > 0) {
         if (rest) {
-            start = elements->nodes[0]->location.start;
-            end = rest->location.end;
+            start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
+            end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
         } else {
-            start = elements->nodes[0]->location.start;
-            end = elements->nodes[elements->size - 1]->location.end;
+            start = PM_NODE_START(elements->nodes[0]);
+            end = PM_NODE_END(elements->nodes[elements->size - 1]);
         }
     } else {
         assert(rest != NULL);
-        start = rest->location.start;
-        end = rest->location.end;
-    }
-
-    *node = (pm_hash_pattern_node_t) {
-        {
-            .type = PM_HASH_PATTERN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = start,
-                .end = end
-            },
-        },
-        .constant = NULL,
-        .elements = { 0 },
-        .rest = rest,
-        .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    pm_node_t *element;
-    PM_NODE_LIST_FOREACH(elements, index, element) {
-        pm_node_list_append(&node->elements, element);
-    }
+        start = PM_NODE_START(rest);
+        end = PM_NODE_END(rest);
+    }
+
+    pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        rest,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 
+    pm_node_list_concat(parser->arena, &node->elements, elements);
     return node;
 }
 
@@ -4486,7 +4454,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
         case PM_NUMBERED_REFERENCE_READ_NODE:
             // This will only ever happen in the event of a syntax error, but we
             // still need to provide something for the node.
-            return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+            return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
         default:
             assert(false && "unreachable");
             return (pm_constant_id_t) -1;
@@ -4499,24 +4467,17 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
 static pm_global_variable_and_write_node_t *
 pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
-
-    *node = (pm_global_variable_and_write_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name = pm_global_variable_write_name(parser, target),
-        .name_loc = target->location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    return node;
+    return pm_global_variable_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        pm_global_variable_write_name(parser, target),
+        target->location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -4524,25 +4485,17 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
  */
 static pm_global_variable_operator_write_node_t *
 pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
-
-    *node = (pm_global_variable_operator_write_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name = pm_global_variable_write_name(parser, target),
-        .name_loc = target->location,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
-    };
-
-    return node;
+    return pm_global_variable_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        pm_global_variable_write_name(parser, target),
+        target->location,
+        TOK2LOC(parser, operator),
+        value,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+    );
 }
 
 /**
@@ -4551,24 +4504,17 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
 static pm_global_variable_or_write_node_t *
 pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
 
-    *node = (pm_global_variable_or_write_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name = pm_global_variable_write_name(parser, target),
-        .name_loc = target->location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_global_variable_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        pm_global_variable_write_name(parser, target),
+        target->location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -4576,18 +4522,13 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target,
  */
 static pm_global_variable_read_node_t *
 pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
-    *node = (pm_global_variable_read_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name),
-        },
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    return pm_global_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
@@ -4595,18 +4536,13 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name)
  */
 static pm_global_variable_read_node_t *
 pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
-    pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
-    *node = (pm_global_variable_read_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .name = name
-    };
-
-    return node;
+    return pm_global_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        name
+    );
 }
 
 /**
@@ -4614,25 +4550,16 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
  */
 static pm_global_variable_write_node_t *
 pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
-    *node = (pm_global_variable_write_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            },
-        },
-        .name = pm_global_variable_write_name(parser, target),
-        .name_loc = PM_LOCATION_NODE_VALUE(target),
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_global_variable_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(target, value),
+        pm_global_variable_write_name(parser, target),
+        target->location,
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -4640,21 +4567,16 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con
  */
 static pm_global_variable_write_node_t *
 pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
-    pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
-    *node = (pm_global_variable_write_node_t) {
-        {
-            .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .name = name,
-        .name_loc = PM_LOCATION_NULL_VALUE(parser),
-        .operator_loc = PM_LOCATION_NULL_VALUE(parser),
-        .value = value
-    };
-
-    return node;
+    return pm_global_variable_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        name,
+        ((pm_location_t) { 0 }),
+        value,
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -4663,29 +4585,24 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
 static pm_hash_node_t *
 pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
     assert(opening != NULL);
-    pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
-
-    *node = (pm_hash_node_t) {
-        {
-            .type = PM_HASH_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(opening)
-        },
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_NULL_VALUE(parser),
-        .elements = { 0 }
-    };
 
-    return node;
+    return pm_hash_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, opening),
+        TOK2LOC(parser, opening),
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
  * Append a new element to a hash node.
  */
-static inline void
-pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
-    pm_node_list_append(&hash->elements, element);
+static PRISM_INLINE void
+pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
+    pm_node_list_append(arena, &hash->elements, element);
 
     bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
     if (static_literal) {
@@ -4696,14 +4613,14 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
     }
 
     if (!static_literal) {
-        pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
+        pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
     }
 }
 
-static inline void
-pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
-    hash->base.location.end = token->end;
-    hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
+static PRISM_INLINE void
+pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
+    hash->closing_loc = TOK2LOC(parser, token);
 }
 
 /**
@@ -4719,38 +4636,32 @@ pm_if_node_create(pm_parser_t *parser,
     const pm_token_t *end_keyword
 ) {
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
-    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
 
-    const uint8_t *end;
-    if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
-        end = end_keyword->end;
+    uint32_t start = PM_TOKEN_START(parser, if_keyword);
+    uint32_t end;
+
+    if (end_keyword != NULL) {
+        end = PM_TOKEN_END(parser, end_keyword);
     } else if (subsequent != NULL) {
-        end = subsequent->location.end;
+        end = PM_NODE_END(subsequent);
     } else if (pm_statements_node_body_length(statements) != 0) {
-        end = statements->base.location.end;
+        end = PM_NODE_END(statements);
     } else {
-        end = predicate->location.end;
-    }
-
-    *node = (pm_if_node_t) {
-        {
-            .type = PM_IF_NODE,
-            .flags = PM_NODE_FLAG_NEWLINE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = if_keyword->start,
-                .end = end
-            },
-        },
-        .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
-        .predicate = predicate,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
-        .statements = statements,
-        .subsequent = subsequent,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+        end = PM_NODE_END(predicate);
+    }
+
+    return pm_if_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_NEWLINE,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        TOK2LOC(parser, if_keyword),
+        predicate,
+        NTOK2LOC(parser, then_keyword),
+        statements,
+        subsequent,
+        NTOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
@@ -4759,30 +4670,22 @@ pm_if_node_create(pm_parser_t *parser,
 static pm_if_node_t *
 pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
-    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
 
     pm_statements_node_t *statements = pm_statements_node_create(parser);
     pm_statements_node_body_append(parser, statements, statement, true);
 
-    *node = (pm_if_node_t) {
-        {
-            .type = PM_IF_NODE,
-            .flags = PM_NODE_FLAG_NEWLINE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = statement->location.start,
-                .end = predicate->location.end
-            },
-        },
-        .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
-        .predicate = predicate,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .statements = statements,
-        .subsequent = NULL,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    return pm_if_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_NEWLINE,
+        PM_LOCATION_INIT_NODES(statement, predicate),
+        TOK2LOC(parser, if_keyword),
+        predicate,
+        ((pm_location_t) { 0 }),
+        statements,
+        NULL,
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -4799,43 +4702,31 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to
     pm_statements_node_t *else_statements = pm_statements_node_create(parser);
     pm_statements_node_body_append(parser, else_statements, false_expression, true);
 
-    pm_token_t end_keyword = not_provided(parser);
-    pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
-
-    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
-
-    *node = (pm_if_node_t) {
-        {
-            .type = PM_IF_NODE,
-            .flags = PM_NODE_FLAG_NEWLINE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = predicate->location.start,
-                .end = false_expression->location.end,
-            },
-        },
-        .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .predicate = predicate,
-        .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
-        .statements = if_statements,
-        .subsequent = (pm_node_t *) else_node,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
-
+    pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
+    return pm_if_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_NEWLINE,
+        PM_LOCATION_INIT_NODES(predicate, false_expression),
+        ((pm_location_t) { 0 }),
+        predicate,
+        TOK2LOC(parser, qmark),
+        if_statements,
+        UP(else_node),
+        ((pm_location_t) { 0 })
+    );
 }
 
-static inline void
-pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
-    node->base.location.end = keyword->end;
-    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+    node->end_keyword_loc = TOK2LOC(parser, keyword);
 }
 
-static inline void
-pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
-    node->base.location.end = keyword->end;
-    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+    node->end_keyword_loc = TOK2LOC(parser, keyword);
 }
 
 /**
@@ -4843,18 +4734,13 @@ pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword
  */
 static pm_implicit_node_t *
 pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
-    pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
-
-    *node = (pm_implicit_node_t) {
-        {
-            .type = PM_IMPLICIT_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = value->location
-        },
-        .value = value
-    };
-
-    return node;
+    return pm_implicit_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODE(value),
+        value
+    );
 }
 
 /**
@@ -4864,17 +4750,12 @@ static pm_implicit_rest_node_t *
 pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_COMMA);
 
-    pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
-
-    *node = (pm_implicit_rest_node_t) {
-        {
-            .type = PM_IMPLICIT_REST_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        }
-    };
-
-    return node;
+    return pm_implicit_rest_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -4883,28 +4764,33 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
 static pm_integer_node_t *
 pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_INTEGER);
-    pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
 
-    *node = (pm_integer_node_t) {
-        {
-            .type = PM_INTEGER_NODE,
-            .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .value = { 0 }
-    };
+    pm_integer_node_t *node = pm_integer_node_new(
+        parser->arena,
+        ++parser->node_id,
+        base | PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        ((pm_integer_t) { 0 })
+    );
 
-    pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
-    switch (base) {
-        case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
-        case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
-        case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
-        case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
-        default: assert(false && "unreachable"); break;
+    if (parser->integer.lexed) {
+        // The value was already computed during lexing.
+        node->value.value = parser->integer.value;
+        parser->integer.lexed = false;
+    } else {
+        pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+        switch (base) {
+            case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+            case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+            case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+            case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+            default: assert(false && "unreachable"); break;
+        }
+
+        pm_integer_parse(&node->value, integer_base, token->start, token->end);
+        pm_integer_arena_move(parser->arena, &node->value);
     }
 
-    pm_integer_parse(&node->value, integer_base, token->start, token->end);
     return node;
 }
 
@@ -4916,22 +4802,17 @@ static pm_imaginary_node_t *
 pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
 
-    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
-    *node = (pm_imaginary_node_t) {
-        {
-            .type = PM_IMAGINARY_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
+    return pm_imaginary_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        UP(pm_integer_node_create(parser, base, &((pm_token_t) {
             .type = PM_TOKEN_INTEGER,
             .start = token->start,
             .end = token->end - 1
-        }))
-    };
-
-    return node;
+        })))
+    );
 }
 
 /**
@@ -4942,17 +4823,14 @@ static pm_rational_node_t *
 pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
 
-    pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
-    *node = (pm_rational_node_t) {
-        {
-            .type = PM_RATIONAL_NODE,
-            .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numerator = { 0 },
-        .denominator = { .value = 1, 0 }
-    };
+    pm_rational_node_t *node = pm_rational_node_new(
+        parser->arena,
+        ++parser->node_id,
+        base | PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        ((pm_integer_t) { 0 }),
+        ((pm_integer_t) { .value = 1 })
+    );
 
     pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
     switch (base) {
@@ -4964,6 +4842,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
     }
 
     pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+    pm_integer_arena_move(parser->arena, &node->numerator);
 
     return node;
 }
@@ -4976,22 +4855,17 @@ static pm_imaginary_node_t *
 pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
 
-    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
-    *node = (pm_imaginary_node_t) {
-        {
-            .type = PM_IMAGINARY_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
+    return pm_imaginary_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
             .type = PM_TOKEN_INTEGER_RATIONAL,
             .start = token->start,
             .end = token->end - 1
-        }))
-    };
-
-    return node;
+        })))
+    );
 }
 
 /**
@@ -4999,33 +4873,27 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b
  */
 static pm_in_node_t *
 pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
-    pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
+    uint32_t start = PM_TOKEN_START(parser, in_keyword);
+    uint32_t end;
 
-    const uint8_t *end;
     if (statements != NULL) {
-        end = statements->base.location.end;
-    } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
-        end = then_keyword->end;
+        end = PM_NODE_END(statements);
+    } else if (then_keyword != NULL) {
+        end = PM_TOKEN_END(parser, then_keyword);
     } else {
-        end = pattern->location.end;
-    }
-
-    *node = (pm_in_node_t) {
-        {
-            .type = PM_IN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = in_keyword->start,
-                .end = end
-            },
-        },
-        .pattern = pattern,
-        .statements = statements,
-        .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
-        .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
-    };
-
-    return node;
+        end = PM_NODE_END(pattern);
+    }
+
+    return pm_in_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        pattern,
+        statements,
+        TOK2LOC(parser, in_keyword),
+        NTOK2LOC(parser, then_keyword)
+    );
 }
 
 /**
@@ -5034,24 +4902,17 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t
 static pm_instance_variable_and_write_node_t *
 pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
 
-    *node = (pm_instance_variable_and_write_node_t) {
-        {
-            .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_instance_variable_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -5059,25 +4920,17 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari
  */
 static pm_instance_variable_operator_write_node_t *
 pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
-
-    *node = (pm_instance_variable_operator_write_node_t) {
-        {
-            .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
-    };
-
-    return node;
+    return pm_instance_variable_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+    );
 }
 
 /**
@@ -5086,24 +4939,17 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
 static pm_instance_variable_or_write_node_t *
 pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
-
-    *node = (pm_instance_variable_or_write_node_t) {
-        {
-            .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = target->name,
-        .name_loc = target->base.location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
 
-    return node;
+    return pm_instance_variable_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->name,
+        target->base.location,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -5112,18 +4958,14 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia
 static pm_instance_variable_read_node_t *
 pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
-    pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
-
-    *node = (pm_instance_variable_read_node_t) {
-        {
-            .type = PM_INSTANCE_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .name = pm_parser_constant_id_token(parser, token)
-    };
 
-    return node;
+    return pm_instance_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        pm_parser_constant_id_token(parser, token)
+    );
 }
 
 /**
@@ -5132,24 +4974,16 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok
  */
 static pm_instance_variable_write_node_t *
 pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
-    pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
-    *node = (pm_instance_variable_write_node_t) {
-        {
-            .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = read_node->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .name = read_node->name,
-        .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_instance_variable_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(read_node, value),
+        read_node->name,
+        read_node->base.location,
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -5158,7 +4992,7 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
  * literals.
  */
 static void
-pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
     switch (PM_NODE_TYPE(part)) {
         case PM_STRING_NODE:
             pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
@@ -5186,14 +5020,14 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
             break;
         }
         case PM_EMBEDDED_VARIABLE_NODE:
-            pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
+            pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
             break;
         default:
             assert(false && "unexpected node type");
             break;
     }
 
-    pm_node_list_append(parts, part);
+    pm_node_list_append(arena, parts, part);
 }
 
 /**
@@ -5201,43 +5035,34 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
  */
 static pm_interpolated_regular_expression_node_t *
 pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
-    pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
-
-    *node = (pm_interpolated_regular_expression_node_t) {
-        {
-            .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = NULL,
-            },
-        },
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .parts = { 0 }
-    };
-
-    return node;
+    return pm_interpolated_regular_expression_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, opening),
+        TOK2LOC(parser, opening),
+        ((pm_node_list_t) { 0 }),
+        TOK2LOC(parser, opening)
+    );
 }
 
-static inline void
-pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
-    if (node->base.location.start > part->location.start) {
-        node->base.location.start = part->location.start;
+static PRISM_INLINE void
+pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
+    if (PM_NODE_START(node) > PM_NODE_START(part)) {
+        PM_NODE_START_SET_NODE(node, part);
     }
-    if (node->base.location.end < part->location.end) {
-        node->base.location.end = part->location.end;
+    if (PM_NODE_END(node) < PM_NODE_END(part)) {
+        PM_NODE_LENGTH_SET_NODE(node, part);
     }
 
-    pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
+    pm_interpolated_node_append(arena, UP(node), &node->parts, part);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
-    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
-    node->base.location.end = closing->end;
-    pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
+    node->closing_loc = TOK2LOC(parser, closing);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+    pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
 }
 
 /**
@@ -5249,7 +5074,7 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
  * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
  * single static literal string that can be pushed onto the stack on its own.
  * Note that this doesn't necessarily mean that the string will be frozen or
- * not; the instructions in CRuby will be either putobject or putstring,
+ * not; the instructions in CRuby will be either putobject, dupstring or dupchilledstring,
  * depending on the combination of `--enable-frozen-string-literal`,
  * `# frozen_string_literal: true`, and whether or not there is interpolation.
  *
@@ -5263,22 +5088,31 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
  * is necessary to indicate that the string should be left up to the runtime,
  * which could potentially use a chilled string otherwise.
  */
-static inline void
-pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+static PRISM_INLINE void
+pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
+    pm_arena_t *arena = parser->arena;
 #define CLEAR_FLAGS(node) \
-    node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+    node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
 
 #define MUTABLE_FLAGS(node) \
-    node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+    node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
 
-    if (node->parts.size == 0 && node->opening_loc.start == NULL) {
-        node->base.location.start = part->location.start;
+    if (node->parts.size == 0 && node->opening_loc.length == 0) {
+        PM_NODE_START_SET_NODE(node, part);
     }
 
-    node->base.location.end = MAX(node->base.location.end, part->location.end);
+    if (PM_NODE_END(part) > PM_NODE_END(node)) {
+        PM_NODE_LENGTH_SET_NODE(node, part);
+    }
 
     switch (PM_NODE_TYPE(part)) {
         case PM_STRING_NODE:
+            // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
+            // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
+            // as long as this interpolation only consists of other string literals.
+            if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
+                pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+            }
             part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
             break;
         case PM_INTERPOLATED_STRING_NODE:
@@ -5330,8 +5164,14 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
             break;
         case PM_X_STRING_NODE:
         case PM_INTERPOLATED_X_STRING_NODE:
-            // If this is an x string, then this is a syntax error. But we want
-            // to handle it here so that we don't fail the assertion.
+        case PM_SYMBOL_NODE:
+        case PM_INTERPOLATED_SYMBOL_NODE:
+            // These will only happen in error cases. But we want to handle it
+            // here so that we don't fail the assertion.
+            CLEAR_FLAGS(node);
+            pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
+            return;
+        case PM_ERROR_RECOVERY_NODE:
             CLEAR_FLAGS(node);
             break;
         default:
@@ -5339,7 +5179,7 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
             break;
     }
 
-    pm_node_list_append(&node->parts, part);
+    pm_node_list_append(arena, &node->parts, part);
 
 #undef CLEAR_FLAGS
 #undef MUTABLE_FLAGS
@@ -5350,7 +5190,6 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
  */
 static pm_interpolated_string_node_t *
 pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
-    pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
     pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
 
     switch (parser->frozen_string_literal) {
@@ -5362,25 +5201,23 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
             break;
     }
 
-    *node = (pm_interpolated_string_node_t) {
-        {
-            .type = PM_INTERPOLATED_STRING_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end,
-            },
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .parts = { 0 }
-    };
+    uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+    uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+    pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NTOK2LOC(parser, opening),
+        ((pm_node_list_t) { 0 }),
+        NTOK2LOC(parser, closing)
+    );
 
     if (parts != NULL) {
         pm_node_t *part;
         PM_NODE_LIST_FOREACH(parts, index, part) {
-            pm_interpolated_string_node_append(node, part);
+            pm_interpolated_string_node_append(parser, node, part);
         }
     }
 
@@ -5391,25 +5228,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
  * Set the closing token of the given InterpolatedStringNode node.
  */
 static void
-pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
-    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
-    node->base.location.end = closing->end;
+pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = TOK2LOC(parser, closing);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
 }
 
 static void
-pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
-    if (node->parts.size == 0 && node->opening_loc.start == NULL) {
-        node->base.location.start = part->location.start;
+pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
+    if (node->parts.size == 0 && node->opening_loc.length == 0) {
+        PM_NODE_START_SET_NODE(node, part);
     }
 
-    pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
-    node->base.location.end = MAX(node->base.location.end, part->location.end);
+    pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+
+    if (PM_NODE_END(part) > PM_NODE_END(node)) {
+        PM_NODE_LENGTH_SET_NODE(node, part);
+    }
 }
 
 static void
-pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
-    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
-    node->base.location.end = closing->end;
+pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = TOK2LOC(parser, closing);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
 }
 
 /**
@@ -5417,27 +5257,23 @@ pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node,
  */
 static pm_interpolated_symbol_node_t *
 pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
-    pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
-
-    *node = (pm_interpolated_symbol_node_t) {
-        {
-            .type = PM_INTERPOLATED_SYMBOL_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end,
-            },
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .parts = { 0 }
-    };
+    uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+    uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+    pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NTOK2LOC(parser, opening),
+        ((pm_node_list_t) { 0 }),
+        NTOK2LOC(parser, closing)
+    );
 
     if (parts != NULL) {
         pm_node_t *part;
         PM_NODE_LIST_FOREACH(parts, index, part) {
-            pm_interpolated_symbol_node_append(node, part);
+            pm_interpolated_symbol_node_append(parser->arena, node, part);
         }
     }
 
@@ -5449,35 +5285,27 @@ pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *openin
  */
 static pm_interpolated_x_string_node_t *
 pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
-
-    *node = (pm_interpolated_x_string_node_t) {
-        {
-            .type = PM_INTERPOLATED_X_STRING_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            },
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .parts = { 0 }
-    };
-
-    return node;
+    return pm_interpolated_x_string_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        TOK2LOC(parser, opening),
+        ((pm_node_list_t) { 0 }),
+        TOK2LOC(parser, closing)
+    );
 }
 
-static inline void
-pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
-    pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
-    node->base.location.end = part->location.end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
+    pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+    PM_NODE_LENGTH_SET_NODE(node, part);
 }
 
-static inline void
-pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
-    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
-    node->base.location.end = closing->end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = TOK2LOC(parser, closing);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
 }
 
 /**
@@ -5485,17 +5313,12 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
  */
 static pm_it_local_variable_read_node_t *
 pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
-
-    *node = (pm_it_local_variable_read_node_t) {
-        {
-            .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name)
-        }
-    };
-
-    return node;
+    return pm_it_local_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name)
+    );
 }
 
 /**
@@ -5503,20 +5326,12 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam
  */
 static pm_it_parameters_node_t *
 pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
-
-    *node = (pm_it_parameters_node_t) {
-        {
-            .type = PM_IT_PARAMETERS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            }
-        }
-    };
-
-    return node;
+    return pm_it_parameters_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing)
+    );
 }
 
 /**
@@ -5524,37 +5339,31 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con
  */
 static pm_keyword_hash_node_t *
 pm_keyword_hash_node_create(pm_parser_t *parser) {
-    pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
-
-    *node = (pm_keyword_hash_node_t) {
-        .base = {
-            .type = PM_KEYWORD_HASH_NODE,
-            .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-        },
-        .elements = { 0 }
-    };
-
-    return node;
+    return pm_keyword_hash_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_node_list_t) { 0 })
+    );
 }
 
 /**
  * Append an element to a KeywordHashNode node.
  */
 static void
-pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
+pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
     // If the element being added is not an AssocNode or does not have a symbol
     // key, then we want to turn the SYMBOL_KEYS flag off.
     if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
-        pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
+        pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
     }
 
-    pm_node_list_append(&hash->elements, element);
-    if (hash->base.location.start == NULL) {
-        hash->base.location.start = element->location.start;
+    pm_node_list_append(arena, &hash->elements, element);
+    if (PM_NODE_LENGTH(hash) == 0) {
+        PM_NODE_START_SET_NODE(hash, element);
     }
-    hash->base.location.end = element->location.end;
+    PM_NODE_LENGTH_SET_NODE(hash, element);
 }
 
 /**
@@ -5562,22 +5371,14 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el
  */
 static pm_required_keyword_parameter_node_t *
 pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
-
-    *node = (pm_required_keyword_parameter_node_t) {
-        {
-            .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = name->start,
-                .end = name->end
-            },
-        },
-        .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
-        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
-    };
-
-    return node;
+    return pm_required_keyword_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+        TOK2LOC(parser, name)
+    );
 }
 
 /**
@@ -5585,23 +5386,15 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
  */
 static pm_optional_keyword_parameter_node_t *
 pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
-    pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
-
-    *node = (pm_optional_keyword_parameter_node_t) {
-        {
-            .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = name->start,
-                .end = value->location.end
-            },
-        },
-        .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
-        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
-        .value = value
-    };
-
-    return node;
+    return pm_optional_keyword_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+        pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+        TOK2LOC(parser, name),
+        value
+    );
 }
 
 /**
@@ -5609,23 +5402,15 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
  */
 static pm_keyword_rest_parameter_node_t *
 pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
-    pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
-
-    *node = (pm_keyword_rest_parameter_node_t) {
-        {
-            .type = PM_KEYWORD_REST_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
-            },
-        },
-        .name = pm_parser_optional_constant_id_token(parser, name),
-        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_keyword_rest_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+        name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+        NTOK2LOC(parser, name),
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -5641,26 +5426,18 @@ pm_lambda_node_create(
     pm_node_t *parameters,
     pm_node_t *body
 ) {
-    pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
-
-    *node = (pm_lambda_node_t) {
-        {
-            .type = PM_LAMBDA_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = closing->end
-            },
-        },
-        .locals = *locals,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .parameters = parameters,
-        .body = body
-    };
-
-    return node;
+    return pm_lambda_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, operator, closing),
+        *locals,
+        TOK2LOC(parser, operator),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing),
+        parameters,
+        body
+    );
 }
 
 /**
@@ -5670,25 +5447,18 @@ static pm_local_variable_and_write_node_t *
 pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
     assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
     assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
-    pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
-
-    *node = (pm_local_variable_and_write_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name_loc = target->location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .name = name,
-        .depth = depth
-    };
 
-    return node;
+    return pm_local_variable_and_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->location,
+        TOK2LOC(parser, operator),
+        value,
+        name,
+        depth
+    );
 }
 
 /**
@@ -5696,26 +5466,18 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
  */
 static pm_local_variable_operator_write_node_t *
 pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
-    pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
-
-    *node = (pm_local_variable_operator_write_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name_loc = target->location,
-        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .name = name,
-        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
-        .depth = depth
-    };
-
-    return node;
+    return pm_local_variable_operator_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->location,
+        TOK2LOC(parser, operator),
+        value,
+        name,
+        pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+        depth
+    );
 }
 
 /**
@@ -5725,25 +5487,18 @@ static pm_local_variable_or_write_node_t *
 pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
     assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
     assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
-    pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
 
-    *node = (pm_local_variable_or_write_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->location.start,
-                .end = value->location.end
-            }
-        },
-        .name_loc = target->location,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value,
-        .name = name,
-        .depth = depth
-    };
-
-    return node;
+    return pm_local_variable_or_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(target, value),
+        target->location,
+        TOK2LOC(parser, operator),
+        value,
+        name,
+        depth
+    );
 }
 
 /**
@@ -5753,19 +5508,14 @@ static pm_local_variable_read_node_t *
 pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
     if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
 
-    pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
-
-    *node = (pm_local_variable_read_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name)
-        },
-        .name = name_id,
-        .depth = depth
-    };
-
-    return node;
+    return pm_local_variable_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        name_id,
+        depth
+    );
 }
 
 /**
@@ -5792,32 +5542,23 @@ pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t
  */
 static pm_local_variable_write_node_t *
 pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
-    pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
-
-    *node = (pm_local_variable_write_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = name_loc->start,
-                .end = value->location.end
-            }
-        },
-        .name = name,
-        .depth = depth,
-        .value = value,
-        .name_loc = *name_loc,
-        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_local_variable_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
+        name,
+        depth,
+        *name_loc,
+        value,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
  * Returns true if the given bounds comprise `it`.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_token_is_it(const uint8_t *start, const uint8_t *end) {
     return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
 }
@@ -5826,19 +5567,24 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
  * are of the form /^_\d$/).
  */
-static inline bool
-pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
-    return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
+static PRISM_INLINE bool
+pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
+    return (
+        (length == 2) &&
+        (parser->start[start] == '_') &&
+        (parser->start[start + 1] != '0') &&
+        pm_char_is_decimal_digit(parser->start[start + 1])
+    );
 }
 
 /**
  * Ensure the given bounds do not comprise a numbered parameter. If they do, add
  * an appropriate error message to the parser.
  */
-static inline void
-pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
-    if (pm_token_is_numbered_parameter(start, end)) {
-        PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
+static PRISM_INLINE void
+pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
+    if (pm_token_is_numbered_parameter(parser, start, length)) {
+        PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
     }
 }
 
@@ -5848,20 +5594,16 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
  */
 static pm_local_variable_target_node_t *
 pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
-    pm_refute_numbered_parameter(parser, location->start, location->end);
-    pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
-
-    *node = (pm_local_variable_target_node_t) {
-        {
-            .type = PM_LOCAL_VARIABLE_TARGET_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = *location
-        },
-        .name = name,
-        .depth = depth
-    };
-
-    return node;
+    pm_refute_numbered_parameter(parser, location->start, location->length);
+
+    return pm_local_variable_target_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = location->start, .length = location->length }),
+        name,
+        depth
+    );
 }
 
 /**
@@ -5871,23 +5613,15 @@ static pm_match_predicate_node_t *
 pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
     pm_assert_value_expression(parser, value);
 
-    pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
-
-    *node = (pm_match_predicate_node_t) {
-        {
-            .type = PM_MATCH_PREDICATE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = value->location.start,
-                .end = pattern->location.end
-            }
-        },
-        .value = value,
-        .pattern = pattern,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_match_predicate_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(value, pattern),
+        value,
+        pattern,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -5897,23 +5631,15 @@ static pm_match_required_node_t *
 pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
     pm_assert_value_expression(parser, value);
 
-    pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
-
-    *node = (pm_match_required_node_t) {
-        {
-            .type = PM_MATCH_REQUIRED_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = value->location.start,
-                .end = pattern->location.end
-            }
-        },
-        .value = value,
-        .pattern = pattern,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_match_required_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(value, pattern),
+        value,
+        pattern,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -5921,19 +5647,14 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *
  */
 static pm_match_write_node_t *
 pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
-    pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
-
-    *node = (pm_match_write_node_t) {
-        {
-            .type = PM_MATCH_WRITE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = call->base.location
-        },
-        .call = call,
-        .targets = { 0 }
-    };
-
-    return node;
+    return pm_match_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODE(call),
+        call,
+        ((pm_node_list_t) { 0 })
+    );
 }
 
 /**
@@ -5941,26 +5662,18 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
  */
 static pm_module_node_t *
 pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
-    pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
-
-    *node = (pm_module_node_t) {
-        {
-            .type = PM_MODULE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = module_keyword->start,
-                .end = end_keyword->end
-            }
-        },
-        .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
-        .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
-        .constant_path = constant_path,
-        .body = body,
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
-        .name = pm_parser_constant_id_token(parser, name)
-    };
-
-    return node;
+    return pm_module_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
+        (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
+        TOK2LOC(parser, module_keyword),
+        constant_path,
+        body,
+        TOK2LOC(parser, end_keyword),
+        pm_parser_constant_id_token(parser, name)
+    );
 }
 
 /**
@@ -5968,22 +5681,17 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const
  */
 static pm_multi_target_node_t *
 pm_multi_target_node_create(pm_parser_t *parser) {
-    pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
-
-    *node = (pm_multi_target_node_t) {
-        {
-            .type = PM_MULTI_TARGET_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = { .start = NULL, .end = NULL }
-        },
-        .lefts = { 0 },
-        .rest = NULL,
-        .rights = { 0 },
-        .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    return pm_multi_target_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -5996,27 +5704,27 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
             node->rest = target;
         } else {
             pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
-            pm_node_list_append(&node->rights, target);
+            pm_node_list_append(parser->arena, &node->rights, target);
         }
     } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
         if (node->rest == NULL) {
             node->rest = target;
         } else {
-            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
-            pm_node_list_append(&node->rights, target);
+            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
+            pm_node_list_append(parser->arena, &node->rights, target);
         }
     } else if (node->rest == NULL) {
-        pm_node_list_append(&node->lefts, target);
+        pm_node_list_append(parser->arena, &node->lefts, target);
     } else {
-        pm_node_list_append(&node->rights, target);
+        pm_node_list_append(parser->arena, &node->rights, target);
     }
 
-    if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
-        node->base.location.start = target->location.start;
+    if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
+        PM_NODE_START_SET_NODE(node, target);
     }
 
-    if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
-        node->base.location.end = target->location.end;
+    if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
+        PM_NODE_LENGTH_SET_NODE(node, target);
     }
 }
 
@@ -6024,18 +5732,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
  * Set the opening of a MultiTargetNode node.
  */
 static void
-pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
-    node->base.location.start = lparen->start;
-    node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
+pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
+    PM_NODE_START_SET_TOKEN(parser, node, lparen);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
+    node->lparen_loc = TOK2LOC(parser, lparen);
 }
 
 /**
  * Set the closing of a MultiTargetNode node.
  */
 static void
-pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
-    node->base.location.end = rparen->end;
-    node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
+pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
+    node->rparen_loc = TOK2LOC(parser, rparen);
 }
 
 /**
@@ -6043,32 +5752,21 @@ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t
  */
 static pm_multi_write_node_t *
 pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
-    pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
-
-    *node = (pm_multi_write_node_t) {
-        {
-            .type = PM_MULTI_WRITE_NODE,
-            .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = target->base.location.start,
-                .end = value->location.end
-            }
-        },
-        .lefts = target->lefts,
-        .rest = target->rest,
-        .rights = target->rights,
-        .lparen_loc = target->lparen_loc,
-        .rparen_loc = target->rparen_loc,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    // Explicitly do not call pm_node_destroy here because we want to keep
-    // around all of the information within the MultiWriteNode node.
-    xfree(target);
-
-    return node;
+    /* The target is no longer necessary because we have reused its children. It
+     * is arena-allocated so no explicit free is needed. */
+    return pm_multi_write_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+        PM_LOCATION_INIT_NODES(target, value),
+        target->lefts,
+        target->rest,
+        target->rights,
+        target->lparen_loc,
+        target->rparen_loc,
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -6077,22 +5775,15 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
 static pm_next_node_t *
 pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
     assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
-    pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
-
-    *node = (pm_next_node_t) {
-        {
-            .type = PM_NEXT_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
-            }
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .arguments = arguments
-    };
 
-    return node;
+    return pm_next_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+        arguments,
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
@@ -6101,16 +5792,31 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments
 static pm_nil_node_t *
 pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_NIL);
-    pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
 
-    *node = (pm_nil_node_t) {{
-        .type = PM_NIL_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
+    return pm_nil_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
+}
 
-    return node;
+/**
+ * Allocate and initialize a new NoKeywordsParameterNode node.
+ */
+static pm_no_block_parameter_node_t *
+pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
+    assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
+    assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
+
+    return pm_no_block_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+        TOK2LOC(parser, operator),
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
@@ -6120,41 +5826,29 @@ static pm_no_keywords_parameter_node_t *
 pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
     assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
     assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
-    pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
-
-    *node = (pm_no_keywords_parameter_node_t) {
-        {
-            .type = PM_NO_KEYWORDS_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = keyword->end
-            }
-        },
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
-    };
 
-    return node;
+    return pm_no_keywords_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+        TOK2LOC(parser, operator),
+        TOK2LOC(parser, keyword)
+    );
 }
 
 /**
  * Allocate and initialize a new NumberedParametersNode node.
  */
 static pm_numbered_parameters_node_t *
-pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
-    pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
-
-    *node = (pm_numbered_parameters_node_t) {
-        {
-            .type = PM_NUMBERED_PARAMETERS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = *location
-        },
-        .maximum = maximum
-    };
-
-    return node;
+pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
+    return pm_numbered_parameters_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        maximum
+    );
 }
 
 /**
@@ -6190,14 +5884,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
     unsigned long value = strtoul(digits, &endptr, 10);
 
     if ((digits == endptr) || (*endptr != '\0')) {
-        pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+        pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
         value = 0;
     }
 
-    xfree(digits);
+    xfree_sized(digits, sizeof(char) * (length + 1));
 
     if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
-        PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
+        PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
         value = 0;
     }
 
@@ -6212,18 +5906,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
 static pm_numbered_reference_read_node_t *
 pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
     assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
-    pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
 
-    *node = (pm_numbered_reference_read_node_t) {
-        {
-            .type = PM_NUMBERED_REFERENCE_READ_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(name),
-        },
-        .number = pm_numbered_reference_read_node_number(parser, name)
-    };
-
-    return node;
+    return pm_numbered_reference_read_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, name),
+        pm_numbered_reference_read_node_number(parser, name)
+    );
 }
 
 /**
@@ -6231,24 +5921,16 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na
  */
 static pm_optional_parameter_node_t *
 pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
-    pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
-
-    *node = (pm_optional_parameter_node_t) {
-        {
-            .type = PM_OPTIONAL_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = name->start,
-                .end = value->location.end
-            }
-        },
-        .name = pm_parser_constant_id_token(parser, name),
-        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .value = value
-    };
-
-    return node;
+    return pm_optional_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+        pm_parser_constant_id_token(parser, name),
+        TOK2LOC(parser, name),
+        TOK2LOC(parser, operator),
+        value
+    );
 }
 
 /**
@@ -6258,23 +5940,15 @@ static pm_or_node_t *
 pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
     pm_assert_value_expression(parser, left);
 
-    pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
-
-    *node = (pm_or_node_t) {
-        {
-            .type = PM_OR_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = left->location.start,
-                .end = right->location.end
-            }
-        },
-        .left = left,
-        .right = right,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_or_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(left, right),
+        left,
+        right,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -6282,24 +5956,19 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat
  */
 static pm_parameters_node_t *
 pm_parameters_node_create(pm_parser_t *parser) {
-    pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
-
-    *node = (pm_parameters_node_t) {
-        {
-            .type = PM_PARAMETERS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
-        },
-        .rest = NULL,
-        .keyword_rest = NULL,
-        .block = NULL,
-        .requireds = { 0 },
-        .optionals = { 0 },
-        .posts = { 0 },
-        .keywords = { 0 }
-    };
-
-    return node;
+    return pm_parameters_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_node_list_t) { 0 }),
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        ((pm_node_list_t) { 0 }),
+        ((pm_node_list_t) { 0 }),
+        NULL,
+        NULL
+    );
 }
 
 /**
@@ -6307,16 +5976,12 @@ pm_parameters_node_create(pm_parser_t *parser) {
  */
 static void
 pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
-    if (params->base.location.start == NULL) {
-        params->base.location.start = param->location.start;
-    } else {
-        params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
+    if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
+        PM_NODE_START_SET_NODE(params, param);
     }
 
-    if (params->base.location.end == NULL) {
-        params->base.location.end = param->location.end;
-    } else {
-        params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
+    if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
+        PM_NODE_LENGTH_SET_NODE(params, param);
     }
 }
 
@@ -6324,27 +5989,27 @@ pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param)
  * Append a required parameter to a ParametersNode node.
  */
 static void
-pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
     pm_parameters_node_location_set(params, param);
-    pm_node_list_append(&params->requireds, param);
+    pm_node_list_append(arena, &params->requireds, param);
 }
 
 /**
  * Append an optional parameter to a ParametersNode node.
  */
 static void
-pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
-    pm_parameters_node_location_set(params, (pm_node_t *) param);
-    pm_node_list_append(&params->optionals, (pm_node_t *) param);
+pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
+    pm_parameters_node_location_set(params, UP(param));
+    pm_node_list_append(arena, &params->optionals, UP(param));
 }
 
 /**
  * Append a post optional arguments parameter to a ParametersNode node.
  */
 static void
-pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
     pm_parameters_node_location_set(params, param);
-    pm_node_list_append(&params->posts, param);
+    pm_node_list_append(arena, &params->posts, param);
 }
 
 /**
@@ -6360,9 +6025,9 @@ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
  * Append a keyword parameter to a ParametersNode node.
  */
 static void
-pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
     pm_parameters_node_location_set(params, param);
-    pm_node_list_append(&params->keywords, param);
+    pm_node_list_append(arena, &params->keywords, param);
 }
 
 /**
@@ -6379,9 +6044,9 @@ pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *par
  * Set the block parameter on a ParametersNode node.
  */
 static void
-pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
+pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
     assert(params->block == NULL);
-    pm_parameters_node_location_set(params, (pm_node_t *) param);
+    pm_parameters_node_location_set(params, param);
     params->block = param;
 }
 
@@ -6390,22 +6055,14 @@ pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_no
  */
 static pm_program_node_t *
 pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
-    pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
-
-    *node = (pm_program_node_t) {
-        {
-            .type = PM_PROGRAM_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = statements == NULL ? parser->start : statements->base.location.start,
-                .end = statements == NULL ? parser->end : statements->base.location.end
-            }
-        },
-        .locals = *locals,
-        .statements = statements
-    };
-
-    return node;
+    return pm_program_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODE(statements),
+        *locals,
+        statements
+    );
 }
 
 /**
@@ -6413,24 +6070,15 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
  */
 static pm_parentheses_node_t *
 pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
-    pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
-
-    *node = (pm_parentheses_node_t) {
-        {
-            .type = PM_PARENTHESES_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            }
-        },
-        .body = body,
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
-    };
-
-    return node;
+    return pm_parentheses_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        body,
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -6438,24 +6086,16 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no
  */
 static pm_pinned_expression_node_t *
 pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
-    pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
-
-    *node = (pm_pinned_expression_node_t) {
-        {
-            .type = PM_PINNED_EXPRESSION_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = rparen->end
-            }
-        },
-        .expression = expression,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
-        .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
-    };
-
-    return node;
+    return pm_pinned_expression_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
+        expression,
+        TOK2LOC(parser, operator),
+        TOK2LOC(parser, lparen),
+        TOK2LOC(parser, rparen)
+    );
 }
 
 /**
@@ -6463,22 +6103,14 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con
  */
 static pm_pinned_variable_node_t *
 pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
-    pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
-
-    *node = (pm_pinned_variable_node_t) {
-        {
-            .type = PM_PINNED_VARIABLE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = variable->location.end
-            }
-        },
-        .variable = variable,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_pinned_variable_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+        variable,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -6486,24 +6118,16 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator,
  */
 static pm_post_execution_node_t *
 pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
-    pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
-
-    *node = (pm_post_execution_node_t) {
-        {
-            .type = PM_POST_EXECUTION_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = closing->end
-            }
-        },
-        .statements = statements,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
-    };
-
-    return node;
+    return pm_post_execution_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+        statements,
+        TOK2LOC(parser, keyword),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -6511,24 +6135,16 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co
  */
 static pm_pre_execution_node_t *
 pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
-    pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
-
-    *node = (pm_pre_execution_node_t) {
-        {
-            .type = PM_PRE_EXECUTION_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = closing->end
-            }
-        },
-        .statements = statements,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
-    };
-
-    return node;
+    return pm_pre_execution_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+        statements,
+        TOK2LOC(parser, keyword),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, closing)
+    );
 }
 
 /**
@@ -6538,8 +6154,6 @@ static pm_range_node_t *
 pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
     pm_assert_value_expression(parser, left);
     pm_assert_value_expression(parser, right);
-
-    pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
     pm_node_flags_t flags = 0;
 
     // Indicate that this node is an exclusive range if the operator is `...`.
@@ -6557,22 +6171,18 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
         flags |= PM_NODE_FLAG_STATIC_LITERAL;
     }
 
-    *node = (pm_range_node_t) {
-        {
-            .type = PM_RANGE_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = (left == NULL ? operator->start : left->location.start),
-                .end = (right == NULL ? operator->end : right->location.end)
-            }
-        },
-        .left = left,
-        .right = right,
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
+    uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
+    uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
 
-    return node;
+    return pm_range_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        left,
+        right,
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -6581,15 +6191,13 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
 static pm_redo_node_t *
 pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_REDO);
-    pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
 
-    *node = (pm_redo_node_t) {{
-        .type = PM_REDO_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
-
-    return node;
+    return pm_redo_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -6598,31 +6206,22 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_regular_expression_node_t *
 pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
-    pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
-
-    *node = (pm_regular_expression_node_t) {
-        {
-            .type = PM_REGULAR_EXPRESSION_NODE,
-            .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = MIN(opening->start, closing->start),
-                .end = MAX(opening->end, closing->end)
-            }
-        },
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = *unescaped
-    };
-
-    return node;
+    return pm_regular_expression_node_new(
+        parser->arena,
+        ++parser->node_id,
+        pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, content),
+        TOK2LOC(parser, closing),
+        *unescaped
+    );
 }
 
 /**
  * Allocate a new initialize a new RegularExpressionNode node.
  */
-static inline pm_regular_expression_node_t *
+static PRISM_INLINE pm_regular_expression_node_t *
 pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
     return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
 }
@@ -6632,18 +6231,13 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
  */
 static pm_required_parameter_node_t *
 pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
-    pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
-
-    *node = (pm_required_parameter_node_t) {
-        {
-            .type = PM_REQUIRED_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token)
-        },
-        .name = pm_parser_constant_id_token(parser, token)
-    };
-
-    return node;
+    return pm_required_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        pm_parser_constant_id_token(parser, token)
+    );
 }
 
 /**
@@ -6651,23 +6245,15 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token)
  */
 static pm_rescue_modifier_node_t *
 pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
-    pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
-
-    *node = (pm_rescue_modifier_node_t) {
-        {
-            .type = PM_RESCUE_MODIFIER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = expression->location.start,
-                .end = rescue_expression->location.end
-            }
-        },
-        .expression = expression,
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .rescue_expression = rescue_expression
-    };
-
-    return node;
+    return pm_rescue_modifier_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_NODES(expression, rescue_expression),
+        expression,
+        TOK2LOC(parser, keyword),
+        rescue_expression
+    );
 }
 
 /**
@@ -6675,29 +6261,24 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const
  */
 static pm_rescue_node_t *
 pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
-    pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
-
-    *node = (pm_rescue_node_t) {
-        {
-            .type = PM_RESCUE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(keyword)
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .reference = NULL,
-        .statements = NULL,
-        .subsequent = NULL,
-        .exceptions = { 0 }
-    };
-
-    return node;
+    return pm_rescue_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, keyword),
+        TOK2LOC(parser, keyword),
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        NULL,
+        ((pm_location_t) { 0 }),
+        NULL,
+        NULL
+    );
 }
 
-static inline void
-pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
-    node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+static PRISM_INLINE void
+pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
+    node->operator_loc = TOK2LOC(parser, operator);
 }
 
 /**
@@ -6706,7 +6287,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator)
 static void
 pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
     node->reference = reference;
-    node->base.location.end = reference->location.end;
+    PM_NODE_LENGTH_SET_NODE(node, reference);
 }
 
 /**
@@ -6716,7 +6297,7 @@ static void
 pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
     node->statements = statements;
     if (pm_statements_node_body_length(statements) > 0) {
-        node->base.location.end = statements->base.location.end;
+        PM_NODE_LENGTH_SET_NODE(node, statements);
     }
 }
 
@@ -6726,16 +6307,16 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat
 static void
 pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
     node->subsequent = subsequent;
-    node->base.location.end = subsequent->base.location.end;
+    PM_NODE_LENGTH_SET_NODE(node, subsequent);
 }
 
 /**
  * Append an exception node to a rescue node, and update the location.
  */
 static void
-pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
-    pm_node_list_append(&node->exceptions, exception);
-    node->base.location.end = exception->location.end;
+pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
+    pm_node_list_append(arena, &node->exceptions, exception);
+    PM_NODE_LENGTH_SET_NODE(node, exception);
 }
 
 /**
@@ -6743,23 +6324,15 @@ pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
  */
 static pm_rest_parameter_node_t *
 pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
-    pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
-
-    *node = (pm_rest_parameter_node_t) {
-        {
-            .type = PM_REST_PARAMETER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
-            }
-        },
-        .name = pm_parser_optional_constant_id_token(parser, name),
-        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
-    };
-
-    return node;
+    return pm_rest_parameter_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+        name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+        NTOK2LOC(parser, name),
+        TOK2LOC(parser, operator)
+    );
 }
 
 /**
@@ -6768,15 +6341,13 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c
 static pm_retry_node_t *
 pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_RETRY);
-    pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
 
-    *node = (pm_retry_node_t) {{
-        .type = PM_RETRY_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
-
-    return node;
+    return pm_retry_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -6784,22 +6355,14 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_return_node_t *
 pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
-    pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
-
-    *node = (pm_return_node_t) {
-        {
-            .type = PM_RETURN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
-            }
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .arguments = arguments
-    };
-
-    return node;
+    return pm_return_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+        TOK2LOC(parser, keyword),
+        arguments
+    );
 }
 
 /**
@@ -6808,15 +6371,13 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
 static pm_self_node_t *
 pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_SELF);
-    pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
 
-    *node = (pm_self_node_t) {{
-        .type = PM_SELF_NODE,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
-
-    return node;
+    return pm_self_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -6824,19 +6385,13 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_shareable_constant_node_t *
 pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
-    pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
-
-    *node = (pm_shareable_constant_node_t) {
-        {
-            .type = PM_SHAREABLE_CONSTANT_NODE,
-            .flags = (pm_node_flags_t) value,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NODE_VALUE(write)
-        },
-        .write = write
-    };
-
-    return node;
+    return pm_shareable_constant_node_new(
+        parser->arena,
+        ++parser->node_id,
+        (pm_node_flags_t) value,
+        PM_LOCATION_INIT_NODE(write),
+        write
+    );
 }
 
 /**
@@ -6844,26 +6399,18 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar
  */
 static pm_singleton_class_node_t *
 pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
-    pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
-
-    *node = (pm_singleton_class_node_t) {
-        {
-            .type = PM_SINGLETON_CLASS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = class_keyword->start,
-                .end = end_keyword->end
-            }
-        },
-        .locals = *locals,
-        .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .expression = expression,
-        .body = body,
-        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
-    };
-
-    return node;
+    return pm_singleton_class_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+        *locals,
+        TOK2LOC(parser, class_keyword),
+        TOK2LOC(parser, operator),
+        expression,
+        body,
+        TOK2LOC(parser, end_keyword)
+    );
 }
 
 /**
@@ -6872,16 +6419,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local
 static pm_source_encoding_node_t *
 pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
-    pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
-
-    *node = (pm_source_encoding_node_t) {{
-        .type = PM_SOURCE_ENCODING_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
 
-    return node;
+    return pm_source_encoding_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -6889,7 +6433,6 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_source_file_node_t*
 pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
-    pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
     assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
 
     pm_node_flags_t flags = 0;
@@ -6903,17 +6446,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
             break;
     }
 
-    *node = (pm_source_file_node_t) {
-        {
-            .type = PM_SOURCE_FILE_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
-        },
-        .filepath = parser->filepath
-    };
-
-    return node;
+    return pm_source_file_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_TOKEN(parser, file_keyword),
+        parser->filepath
+    );
 }
 
 /**
@@ -6922,16 +6461,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
 static pm_source_line_node_t *
 pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD___LINE__);
-    pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
 
-    *node = (pm_source_line_node_t) {{
-        .type = PM_SOURCE_LINE_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
-
-    return node;
+    return pm_source_line_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -6939,22 +6475,14 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_splat_node_t *
 pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
-    pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
-
-    *node = (pm_splat_node_t) {
-        {
-            .type = PM_SPLAT_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = operator->start,
-                .end = (expression == NULL ? operator->end : expression->location.end)
-            }
-        },
-        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
-        .expression = expression
-    };
-
-    return node;
+    return pm_splat_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+        TOK2LOC(parser, operator),
+        expression
+    );
 }
 
 /**
@@ -6962,18 +6490,13 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t
  */
 static pm_statements_node_t *
 pm_statements_node_create(pm_parser_t *parser) {
-    pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
-
-    *node = (pm_statements_node_t) {
-        {
-            .type = PM_STATEMENTS_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .body = { 0 }
-    };
-
-    return node;
+    return pm_statements_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_node_list_t) { 0 })
+    );
 }
 
 /**
@@ -6985,25 +6508,17 @@ pm_statements_node_body_length(pm_statements_node_t *node) {
 }
 
 /**
- * Set the location of the given StatementsNode.
- */
-static void
-pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
-    node->base.location = (pm_location_t) { .start = start, .end = end };
-}
-
-/**
  * Update the location of the statements node based on the statement that is
  * being added to the list.
  */
-static inline void
+static PRISM_INLINE void
 pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
-    if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
-        node->base.location.start = statement->location.start;
+    if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
+        PM_NODE_START_SET_NODE(node, statement);
     }
 
-    if (statement->location.end > node->base.location.end) {
-        node->base.location.end = statement->location.end;
+    if (PM_NODE_END(statement) > PM_NODE_END(node)) {
+        PM_NODE_LENGTH_SET_NODE(node, statement);
     }
 }
 
@@ -7030,7 +6545,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
         }
     }
 
-    pm_node_list_append(&node->body, statement);
+    pm_node_list_append(parser->arena, &node->body, statement);
     if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
 }
 
@@ -7038,18 +6553,17 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
  * Prepend a new node to the given StatementsNode node's body.
  */
 static void
-pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
+pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
     pm_statements_node_body_update(node, statement);
-    pm_node_list_prepend(&node->body, statement);
+    pm_node_list_prepend(arena, &node->body, statement);
     pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
 }
 
 /**
  * Allocate a new StringNode node with the current string on the parser.
  */
-static inline pm_string_node_t *
+static PRISM_INLINE pm_string_node_t *
 pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
-    pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
     pm_node_flags_t flags = 0;
 
     switch (parser->frozen_string_literal) {
@@ -7061,23 +6575,19 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
             break;
     }
 
-    *node = (pm_string_node_t) {
-        {
-            .type = PM_STRING_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
-                .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
-            }
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = *string
-    };
+    uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
+    uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
 
-    return node;
+    return pm_string_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NTOK2LOC(parser, opening),
+        TOK2LOC(parser, content),
+        NTOK2LOC(parser, closing),
+        *string
+    );
 }
 
 /**
@@ -7105,30 +6615,21 @@ pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
 static pm_super_node_t *
 pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
     assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
-    pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
-
-    const uint8_t *end = pm_arguments_end(arguments);
-    if (end == NULL) {
-        assert(false && "unreachable");
-    }
 
-    *node = (pm_super_node_t) {
-        {
-            .type = PM_SUPER_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = end,
-            }
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .lparen_loc = arguments->opening_loc,
-        .arguments = arguments->arguments,
-        .rparen_loc = arguments->closing_loc,
-        .block = arguments->block
-    };
-
-    return node;
+    const pm_location_t *end = pm_arguments_end(arguments);
+    assert(end != NULL && "unreachable");
+
+    return pm_super_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
+        TOK2LOC(parser, keyword),
+        arguments->opening_loc,
+        arguments->arguments,
+        arguments->closing_loc,
+        arguments->block
+    );
 }
 
 /**
@@ -7156,7 +6657,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat
         size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
 
         if (width == 0) {
-            pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+            pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
             break;
         }
 
@@ -7176,7 +6677,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
         size_t width = encoding->char_width(cursor, end - cursor);
 
         if (width == 0) {
-            pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+            pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
             break;
         }
 
@@ -7193,7 +6694,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
  * If the validate flag is set, then it will check the contents of the symbol
  * to ensure that all characters are valid in the encoding.
  */
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
 parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
     if (parser->explicit_encoding != NULL) {
         // A Symbol may optionally have its encoding explicitly set. This will
@@ -7218,160 +6719,31 @@ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_
     return 0;
 }
 
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
-    assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
-            (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
-            (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
-            (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
-
-    // There's special validation logic used if a string does not contain any character escape sequences.
-    if (parser->explicit_encoding == NULL) {
-        // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
-        // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
-        // the US-ASCII encoding.
-        if (ascii_only) {
-            return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
-        }
-
-        if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
-            if (!ascii_only) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
-            }
-        } else if (parser->encoding != modifier_encoding) {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
-
-            if (modifier == 'n' && !ascii_only) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
-            }
-        }
-
-        return flags;
-    }
-
-    // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
-    bool mixed_encoding = false;
-
-    if (mixed_encoding) {
-        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
-    } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
-        // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
-        bool valid_string_in_modifier_encoding = true;
-
-        if (!valid_string_in_modifier_encoding) {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
-        }
-    } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
-        // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
-        if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
-        }
-    }
-
-    // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
-    return flags;
-}
-
-/**
- * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
- * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
- * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
- * may be explicitly set with an escape sequence.
- */
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
-    // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
-    bool valid_unicode_range = true;
-    if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
-        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
-        return flags;
-    }
-
-    // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
-    // to multi-byte characters are allowed.
-    if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
-        // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
-        // following error message appearing twice. We do the same for compatibility.
-        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
-    }
-
-    /**
-     * Start checking modifier flags. We need to process these before considering any explicit encodings that may have
-     * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the
-     * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set
-     * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and
-     * determine the encoding that way.
-     */
-
-    if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
-        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
-    }
-
-    if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
-        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
-    }
-
-    if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
-        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
-    }
-
-    if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
-        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
-    }
-
-    // At this point no encoding modifiers will be present on the regular expression as they would have already
-    // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
-    // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
-    if (ascii_only) {
-        return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
-    }
-
-    // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
-    // or by specifying a modifier.
-    //
-    // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
-    if (parser->explicit_encoding != NULL) {
-        if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
-            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
-        } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
-            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
-        }
-    }
-
-    return 0;
-}
-
 /**
  * Allocate and initialize a new SymbolNode node with the given unescaped
  * string.
  */
 static pm_symbol_node_t *
 pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
-    pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
-    *node = (pm_symbol_node_t) {
-        {
-            .type = PM_SYMBOL_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
-                .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
-            }
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .value_loc = PM_LOCATION_TOKEN_VALUE(value),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = *unescaped
-    };
-
-    return node;
+    uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
+    uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
+
+    return pm_symbol_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL | flags,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        NTOK2LOC(parser, opening),
+        NTOK2LOC(parser, value),
+        NTOK2LOC(parser, closing),
+        *unescaped
+    );
 }
 
 /**
  * Allocate and initialize a new SymbolNode node.
  */
-static inline pm_symbol_node_t *
+static PRISM_INLINE pm_symbol_node_t *
 pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
     return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
 }
@@ -7391,35 +6763,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
  */
 static pm_symbol_node_t *
 pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
-    pm_symbol_node_t *node;
-
-    switch (token->type) {
-        case PM_TOKEN_LABEL: {
-            pm_token_t opening = not_provided(parser);
-            pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+    assert(token->type == PM_TOKEN_LABEL);
 
-            pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
-            node = pm_symbol_node_create(parser, &opening, &label, &closing);
+    pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+    pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
+    pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
 
-            assert((label.end - label.start) >= 0);
-            pm_string_shared_init(&node->unescaped, label.start, label.end);
-            pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
-
-            break;
-        }
-        case PM_TOKEN_MISSING: {
-            pm_token_t opening = not_provided(parser);
-            pm_token_t closing = not_provided(parser);
-
-            pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
-            node = pm_symbol_node_create(parser, &opening, &label, &closing);
-            break;
-        }
-        default:
-            assert(false && "unreachable");
-            node = NULL;
-            break;
-    }
+    assert((label.end - label.start) >= 0);
+    pm_string_shared_init(&node->unescaped, label.start, label.end);
+    pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
 
     return node;
 }
@@ -7429,18 +6781,16 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_symbol_node_t *
 pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
-    pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
-    *node = (pm_symbol_node_t) {
-        {
-            .type = PM_SYMBOL_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .value_loc = PM_LOCATION_NULL_VALUE(parser),
-        .unescaped = { 0 }
-    };
+    pm_symbol_node_t *node = pm_symbol_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_string_t) { 0 })
+    );
 
     pm_string_constant_init(&node->unescaped, content, strlen(content));
     return node;
@@ -7450,21 +6800,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
  * Check if the given node is a label in a hash.
  */
 static bool
-pm_symbol_node_label_p(pm_node_t *node) {
-    const uint8_t *end = NULL;
+pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
+    const pm_location_t *location = NULL;
 
     switch (PM_NODE_TYPE(node)) {
-        case PM_SYMBOL_NODE:
-            end = ((pm_symbol_node_t *) node)->closing_loc.end;
+        case PM_SYMBOL_NODE: {
+            const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
+            if (cast->closing_loc.length > 0) {
+                location = &cast->closing_loc;
+            }
             break;
-        case PM_INTERPOLATED_SYMBOL_NODE:
-            end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
+        }
+        case PM_INTERPOLATED_SYMBOL_NODE: {
+            const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
+            if (cast->closing_loc.length > 0) {
+                location = &cast->closing_loc;
+            }
             break;
+        }
         default:
             return false;
     }
 
-    return (end != NULL) && (end[-1] == ':');
+    return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
 }
 
 /**
@@ -7472,32 +6830,26 @@ pm_symbol_node_label_p(pm_node_t *node) {
  */
 static pm_symbol_node_t *
 pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
-    pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+    pm_symbol_node_t *new_node = pm_symbol_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        TOK2LOC(parser, opening),
+        node->content_loc,
+        TOK2LOC(parser, closing),
+        node->unescaped
+    );
 
-    *new_node = (pm_symbol_node_t) {
-        {
-            .type = PM_SYMBOL_NODE,
-            .flags = PM_NODE_FLAG_STATIC_LITERAL,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            }
-        },
-        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
-        .value_loc = node->content_loc,
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = node->unescaped
+    pm_token_t content = {
+        .type = PM_TOKEN_IDENTIFIER,
+        .start = parser->start + node->content_loc.start,
+        .end = parser->start + node->content_loc.start + node->content_loc.length
     };
 
-    pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
-    pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
-
-    // We are explicitly _not_ using pm_node_destroy here because we don't want
-    // to trash the unescaped string. We could instead copy the string if we
-    // know that it is owned, but we're taking the fast path for now.
-    xfree(node);
+    pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
 
+    /* The old node is arena-allocated so no explicit free is needed. */
     return new_node;
 }
 
@@ -7506,7 +6858,6 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
  */
 static pm_string_node_t *
 pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
-    pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
     pm_node_flags_t flags = 0;
 
     switch (parser->frozen_string_literal) {
@@ -7518,24 +6869,18 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
             break;
     }
 
-    *new_node = (pm_string_node_t) {
-        {
-            .type = PM_STRING_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = node->base.location
-        },
-        .opening_loc = node->opening_loc,
-        .content_loc = node->value_loc,
-        .closing_loc = node->closing_loc,
-        .unescaped = node->unescaped
-    };
-
-    // We are explicitly _not_ using pm_node_destroy here because we don't want
-    // to trash the unescaped string. We could instead copy the string if we
-    // know that it is owned, but we're taking the fast path for now.
-    xfree(node);
+    pm_string_node_t *new_node = pm_string_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_NODE(node),
+        node->opening_loc,
+        node->value_loc,
+        node->closing_loc,
+        node->unescaped
+    );
 
+    /* The old node is arena-allocated so no explicit free is needed. */
     return new_node;
 }
 
@@ -7545,16 +6890,13 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
 static pm_true_node_t *
 pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_TRUE);
-    pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
-
-    *node = (pm_true_node_t) {{
-        .type = PM_TRUE_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = PM_LOCATION_TOKEN_VALUE(token)
-    }};
 
-    return node;
+    return pm_true_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_TOKEN(parser, token)
+    );
 }
 
 /**
@@ -7562,16 +6904,12 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
  */
 static pm_true_node_t *
 pm_true_node_synthesized_create(pm_parser_t *parser) {
-    pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
-
-    *node = (pm_true_node_t) {{
-        .type = PM_TRUE_NODE,
-        .flags = PM_NODE_FLAG_STATIC_LITERAL,
-        .node_id = PM_NODE_IDENTIFY(parser),
-        .location = { .start = parser->start, .end = parser->end }
-    }};
-
-    return node;
+    return pm_true_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_STATIC_LITERAL,
+        PM_LOCATION_INIT_UNSET
+    );
 }
 
 /**
@@ -7580,28 +6918,24 @@ pm_true_node_synthesized_create(pm_parser_t *parser) {
 static pm_undef_node_t *
 pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
     assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
-    pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
-
-    *node = (pm_undef_node_t) {
-        {
-            .type = PM_UNDEF_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_TOKEN_VALUE(token),
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
-        .names = { 0 }
-    };
 
-    return node;
+    return pm_undef_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, token),
+        ((pm_node_list_t) { 0 }),
+        TOK2LOC(parser, token)
+    );
 }
 
 /**
  * Append a name to an undef node.
  */
 static void
-pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
-    node->base.location.end = name->location.end;
-    pm_node_list_append(&node->names, name);
+pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
+    PM_NODE_LENGTH_SET_NODE(node, name);
+    pm_node_list_append(arena, &node->names, name);
 }
 
 /**
@@ -7610,34 +6944,20 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
 static pm_unless_node_t *
 pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
-    pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
-
-    const uint8_t *end;
-    if (statements != NULL) {
-        end = statements->base.location.end;
-    } else {
-        end = predicate->location.end;
-    }
-
-    *node = (pm_unless_node_t) {
-        {
-            .type = PM_UNLESS_NODE,
-            .flags = PM_NODE_FLAG_NEWLINE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = end
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .predicate = predicate,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
-        .statements = statements,
-        .else_clause = NULL,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    pm_node_t *end = statements == NULL ? predicate : UP(statements);
+
+    return pm_unless_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_NEWLINE,
+        PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
+        TOK2LOC(parser, keyword),
+        predicate,
+        NTOK2LOC(parser, then_keyword),
+        statements,
+        NULL,
+        ((pm_location_t) { 0 })
+    );
 }
 
 /**
@@ -7646,36 +6966,28 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
 static pm_unless_node_t *
 pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
-    pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
 
     pm_statements_node_t *statements = pm_statements_node_create(parser);
     pm_statements_node_body_append(parser, statements, statement, true);
 
-    *node = (pm_unless_node_t) {
-        {
-            .type = PM_UNLESS_NODE,
-            .flags = PM_NODE_FLAG_NEWLINE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = statement->location.start,
-                .end = predicate->location.end
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
-        .predicate = predicate,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .statements = statements,
-        .else_clause = NULL,
-        .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
-    };
-
-    return node;
+    return pm_unless_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_NODE_FLAG_NEWLINE,
+        PM_LOCATION_INIT_NODES(statement, predicate),
+        TOK2LOC(parser, unless_keyword),
+        predicate,
+        ((pm_location_t) { 0 }),
+        statements,
+        NULL,
+        ((pm_location_t) { 0 })
+    );
 }
 
-static inline void
-pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
-    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
-    node->base.location.end = end_keyword->end;
+static PRISM_INLINE void
+pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
+    node->end_keyword_loc = TOK2LOC(parser, end_keyword);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
 }
 
 /**
@@ -7690,7 +7002,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
     // All of the block exits that we want to remove should be within the
     // statements, and since we are modifying the statements, we shouldn't have
     // to check the end location.
-    const uint8_t *start = statements->base.location.start;
+    uint32_t start = statements->base.location.start;
 
     for (size_t index = parser->current_block_exits->size; index > 0; index--) {
         pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
@@ -7706,27 +7018,19 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
  */
 static pm_until_node_t *
 pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
-    pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
 
-    *node = (pm_until_node_t) {
-        {
-            .type = PM_UNTIL_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = closing->end,
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .predicate = predicate,
-        .statements = statements
-    };
-
-    return node;
+    return pm_until_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+        TOK2LOC(parser, keyword),
+        NTOK2LOC(parser, do_keyword),
+        TOK2LOC(parser, closing),
+        predicate,
+        statements
+    );
 }
 
 /**
@@ -7734,28 +7038,20 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
  */
 static pm_until_node_t *
 pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
-    pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
     pm_loop_modifier_block_exits(parser, statements);
 
-    *node = (pm_until_node_t) {
-        {
-            .type = PM_UNTIL_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = statements->base.location.start,
-                .end = predicate->location.end,
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .predicate = predicate,
-        .statements = statements
-    };
-
-    return node;
+    return pm_until_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_NODES(statements, predicate),
+        TOK2LOC(parser, keyword),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        predicate,
+        statements
+    );
 }
 
 /**
@@ -7763,42 +7059,34 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
  */
 static pm_when_node_t *
 pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
-    pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
-
-    *node = (pm_when_node_t) {
-        {
-            .type = PM_WHEN_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = NULL
-            }
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .statements = NULL,
-        .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .conditions = { 0 }
-    };
-
-    return node;
+    return pm_when_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_TOKEN(parser, keyword),
+        TOK2LOC(parser, keyword),
+        ((pm_node_list_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        NULL
+    );
 }
 
 /**
  * Append a new condition to a when node.
  */
 static void
-pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
-    node->base.location.end = condition->location.end;
-    pm_node_list_append(&node->conditions, condition);
+pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
+    PM_NODE_LENGTH_SET_NODE(node, condition);
+    pm_node_list_append(arena, &node->conditions, condition);
 }
 
 /**
  * Set the location of the then keyword of a when node.
  */
-static inline void
-pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
-    node->base.location.end = then_keyword->end;
-    node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
+static PRISM_INLINE void
+pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
+    node->then_keyword_loc = TOK2LOC(parser, then_keyword);
 }
 
 /**
@@ -7806,8 +7094,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k
  */
 static void
 pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
-    if (statements->base.location.end > node->base.location.end) {
-        node->base.location.end = statements->base.location.end;
+    if (PM_NODE_END(statements) > PM_NODE_END(node)) {
+        PM_NODE_LENGTH_SET_NODE(node, statements);
     }
 
     node->statements = statements;
@@ -7818,27 +7106,19 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
  */
 static pm_while_node_t *
 pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
-    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
 
-    *node = (pm_while_node_t) {
-        {
-            .type = PM_WHILE_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = closing->end
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
-        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .predicate = predicate,
-        .statements = statements
-    };
-
-    return node;
+    return pm_while_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+        TOK2LOC(parser, keyword),
+        NTOK2LOC(parser, do_keyword),
+        TOK2LOC(parser, closing),
+        predicate,
+        statements
+    );
 }
 
 /**
@@ -7846,28 +7126,20 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
  */
 static pm_while_node_t *
 pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
-    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
     pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
     pm_loop_modifier_block_exits(parser, statements);
 
-    *node = (pm_while_node_t) {
-        {
-            .type = PM_WHILE_NODE,
-            .flags = flags,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = statements->base.location.start,
-                .end = predicate->location.end
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .predicate = predicate,
-        .statements = statements
-    };
-
-    return node;
+    return pm_while_node_new(
+        parser->arena,
+        ++parser->node_id,
+        flags,
+        PM_LOCATION_INIT_NODES(statements, predicate),
+        TOK2LOC(parser, keyword),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        predicate,
+        statements
+    );
 }
 
 /**
@@ -7875,22 +7147,17 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
  */
 static pm_while_node_t *
 pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
-    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
-
-    *node = (pm_while_node_t) {
-        {
-            .type = PM_WHILE_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = PM_LOCATION_NULL_VALUE(parser)
-        },
-        .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
-        .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
-        .closing_loc = PM_LOCATION_NULL_VALUE(parser),
-        .predicate = predicate,
-        .statements = statements
-    };
-
-    return node;
+    return pm_while_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        PM_LOCATION_INIT_UNSET,
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        ((pm_location_t) { 0 }),
+        predicate,
+        statements
+    );
 }
 
 /**
@@ -7899,31 +7166,22 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
  */
 static pm_x_string_node_t *
 pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
-    pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
-
-    *node = (pm_x_string_node_t) {
-        {
-            .type = PM_X_STRING_NODE,
-            .flags = PM_STRING_FLAGS_FROZEN,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = opening->start,
-                .end = closing->end
-            },
-        },
-        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
-        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = *unescaped
-    };
-
-    return node;
+    return pm_x_string_node_new(
+        parser->arena,
+        ++parser->node_id,
+        PM_STRING_FLAGS_FROZEN,
+        PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+        TOK2LOC(parser, opening),
+        TOK2LOC(parser, content),
+        TOK2LOC(parser, closing),
+        *unescaped
+    );
 }
 
 /**
  * Allocate and initialize a new XStringNode node.
  */
-static inline pm_x_string_node_t *
+static PRISM_INLINE pm_x_string_node_t *
 pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
     return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
 }
@@ -7933,40 +7191,31 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
  */
 static pm_yield_node_t *
 pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
-    pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
+    uint32_t start = PM_TOKEN_START(parser, keyword);
+    uint32_t end;
 
-    const uint8_t *end;
-    if (rparen_loc->start != NULL) {
-        end = rparen_loc->end;
+    if (rparen_loc->length > 0) {
+        end = PM_LOCATION_END(rparen_loc);
     } else if (arguments != NULL) {
-        end = arguments->base.location.end;
-    } else if (lparen_loc->start != NULL) {
-        end = lparen_loc->end;
+        end = PM_NODE_END(arguments);
+    } else if (lparen_loc->length > 0) {
+        end = PM_LOCATION_END(lparen_loc);
     } else {
-        end = keyword->end;
-    }
-
-    *node = (pm_yield_node_t) {
-        {
-            .type = PM_YIELD_NODE,
-            .node_id = PM_NODE_IDENTIFY(parser),
-            .location = {
-                .start = keyword->start,
-                .end = end
-            },
-        },
-        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
-        .lparen_loc = *lparen_loc,
-        .arguments = arguments,
-        .rparen_loc = *rparen_loc
-    };
-
-    return node;
+        end = PM_TOKEN_END(parser, keyword);
+    }
+
+    return pm_yield_node_new(
+        parser->arena,
+        ++parser->node_id,
+        0,
+        ((pm_location_t) { .start = start, .length = U32(end - start) }),
+        TOK2LOC(parser, keyword),
+        *lparen_loc,
+        arguments,
+        *rparen_loc
+    );
 }
 
-#undef PM_NODE_ALLOC
-#undef PM_NODE_IDENTIFY
-
 /**
  * Check if any of the currently visible scopes contain a local variable
  * described by the given constant id.
@@ -7992,7 +7241,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
  * described by the given token. This function implicitly inserts a constant
  * into the constant pool.
  */
-static inline int
+static PRISM_INLINE int
 pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
     return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
 }
@@ -8000,27 +7249,35 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
 /**
  * Add a constant id to the local table of the current scope.
  */
-static inline void
+static PRISM_INLINE void
 pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
-    pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
+    pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
 }
 
 /**
  * Add a local variable from a location to the current scope.
  */
 static pm_constant_id_t
-pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
-    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
+pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+    pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
     if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
     return constant_id;
 }
 
 /**
+ * Add a local variable from a location to the current scope.
+ */
+static PRISM_INLINE pm_constant_id_t
+pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
+    return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
+}
+
+/**
  * Add a local variable from a token to the current scope.
  */
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
 pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
-    return pm_parser_local_add_location(parser, token->start, token->end, reads);
+    return pm_parser_local_add_raw(parser, token->start, token->end, reads);
 }
 
 /**
@@ -8054,7 +7311,7 @@ static bool
 pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
     // We want to check whether the parameter name is a numbered parameter or
     // not.
-    pm_refute_numbered_parameter(parser, name->start, name->end);
+    pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
 
     // Otherwise we'll fetch the constant id for the parameter name and check
     // whether it's already in the current scope.
@@ -8078,8 +7335,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
     pm_scope_t *scope = parser->current_scope;
     parser->current_scope = scope->previous;
     pm_locals_free(&scope->locals);
-    pm_node_list_free(&scope->implicit_parameters);
-    xfree(scope);
+    xfree_sized(scope, sizeof(pm_scope_t));
 }
 
 /******************************************************************************/
@@ -8089,7 +7345,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
 /**
  * Pushes a value onto the stack.
  */
-static inline void
+static PRISM_INLINE void
 pm_state_stack_push(pm_state_stack_t *stack, bool value) {
     *stack = (*stack << 1) | (value & 1);
 }
@@ -8097,7 +7353,7 @@ pm_state_stack_push(pm_state_stack_t *stack, bool value) {
 /**
  * Pops a value off the stack.
  */
-static inline void
+static PRISM_INLINE void
 pm_state_stack_pop(pm_state_stack_t *stack) {
     *stack >>= 1;
 }
@@ -8105,38 +7361,38 @@ pm_state_stack_pop(pm_state_stack_t *stack) {
 /**
  * Returns the value at the top of the stack.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_state_stack_p(const pm_state_stack_t *stack) {
     return *stack & 1;
 }
 
-static inline void
+static PRISM_INLINE void
 pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
     // Use the negation of the value to prevent stack overflow.
     pm_state_stack_push(&parser->accepts_block_stack, !value);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_accepts_block_stack_pop(pm_parser_t *parser) {
     pm_state_stack_pop(&parser->accepts_block_stack);
 }
 
-static inline bool
+static PRISM_INLINE bool
 pm_accepts_block_stack_p(pm_parser_t *parser) {
     return !pm_state_stack_p(&parser->accepts_block_stack);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
     pm_state_stack_push(&parser->do_loop_stack, value);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_do_loop_stack_pop(pm_parser_t *parser) {
     pm_state_stack_pop(&parser->do_loop_stack);
 }
 
-static inline bool
+static PRISM_INLINE bool
 pm_do_loop_stack_p(pm_parser_t *parser) {
     return pm_state_stack_p(&parser->do_loop_stack);
 }
@@ -8149,7 +7405,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
  * Get the next character in the source starting from +cursor+. If that position
  * is beyond the end of the source then return '\0'.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
     if (cursor < parser->end) {
         return *cursor;
@@ -8163,7 +7419,7 @@ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
  * adding the given offset. If that position is beyond the end of the source
  * then return '\0'.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
     return peek_at(parser, parser->current.end + offset);
 }
@@ -8172,7 +7428,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
  * Get the next character in the source starting from parser->current.end. If
  * that position is beyond the end of the source then return '\0'.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 peek(const pm_parser_t *parser) {
     return peek_at(parser, parser->current.end);
 }
@@ -8181,7 +7437,7 @@ peek(const pm_parser_t *parser) {
  * If the character to be read matches the given value, then returns true and
  * advances the current pointer.
  */
-static inline bool
+static PRISM_INLINE bool
 match(pm_parser_t *parser, uint8_t value) {
     if (peek(parser) == value) {
         parser->current.end++;
@@ -8194,7 +7450,7 @@ match(pm_parser_t *parser, uint8_t value) {
  * Return the length of the line ending string starting at +cursor+, or 0 if it
  * is not a line ending. This function is intended to be CRLF/LF agnostic.
  */
-static inline size_t
+static PRISM_INLINE size_t
 match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
     if (peek_at(parser, cursor) == '\n') {
         return 1;
@@ -8210,7 +7466,7 @@ match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
  * `parser->current.end + offset`, or 0 if it is not a line ending. This
  * function is intended to be CRLF/LF agnostic.
  */
-static inline size_t
+static PRISM_INLINE size_t
 match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
     return match_eol_at(parser, parser->current.end + offset);
 }
@@ -8220,7 +7476,7 @@ match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
  * or 0 if it is not a line ending. This function is intended to be CRLF/LF
  * agnostic.
  */
-static inline size_t
+static PRISM_INLINE size_t
 match_eol(pm_parser_t *parser) {
     return match_eol_at(parser, parser->current.end);
 }
@@ -8228,7 +7484,7 @@ match_eol(pm_parser_t *parser) {
 /**
  * Skip to the next newline character or NUL byte.
  */
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
 next_newline(const uint8_t *cursor, ptrdiff_t length) {
     assert(length >= 0);
 
@@ -8241,7 +7497,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
 /**
  * This is equivalent to the predicate of warn_balanced in CRuby.
  */
-static inline bool
+static PRISM_INLINE bool
 ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
     return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
 }
@@ -8319,7 +7575,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
         // issue because we didn't understand the encoding that the user was
         // trying to use. In this case we'll keep using the default encoding but
         // add an error to the parser to indicate an unsuccessful parse.
-        pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+        pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
     }
 }
 
@@ -8344,7 +7600,7 @@ parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t valu
     }
 }
 
-static inline bool
+static PRISM_INLINE bool
 pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
     return b == '\'' || b == '"' || b == ':' || b == ';';
 }
@@ -8354,13 +7610,15 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
  * found, it returns a pointer to the start of the marker. Otherwise it returns
  * NULL.
  */
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
 parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
-    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
-        if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
-            return cursor;
+    // Scan for '*' as the middle character, since it is rarer than '-' in
+    // typical comments and avoids repeated memchr calls for '-' that hit
+    // dashes in words like "foo-bar".
+    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
+        if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
+            return cursor - 1;
         }
-        cursor++;
     }
     return NULL;
 }
@@ -8375,7 +7633,7 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
  * It returns true if it consumes the entire comment. Otherwise it returns
  * false.
  */
-static inline bool
+static PRISM_INLINE bool
 parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
     bool result = true;
 
@@ -8397,11 +7655,24 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
             // have a magic comment.
             return false;
         }
+    } else {
+        // Non-emacs magic comments must contain a colon for `key: value`.
+        // Reject early if there is no colon to avoid scanning the entire
+        // comment character-by-character.
+        if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
+            return false;
+        }
+
+        // Advance start past leading whitespace so the main loop begins
+        // directly at the key, avoiding a redundant whitespace scan.
+        start += pm_strspn_whitespace(start, end - start);
     }
 
     cursor = start;
     while (cursor < end) {
-        while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+        if (indicator) {
+            while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+        }
 
         const uint8_t *key_start = cursor;
         while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
@@ -8429,7 +7700,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
                 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
             }
             value_end = cursor;
-            if (*cursor == '"') cursor++;
+            if (cursor < end && *cursor == '"') cursor++;
         } else {
             value_start = cursor;
             while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
@@ -8487,7 +7758,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
                     case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
                         PM_PARSER_WARN_TOKEN_FORMAT(
                             parser,
-                            parser->current,
+                            &parser->current,
                             PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
                             (int) key_length,
                             (const char *) key_source,
@@ -8514,7 +7785,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
                         case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
                             PM_PARSER_WARN_TOKEN_FORMAT(
                                 parser,
-                                parser->current,
+                                &parser->current,
                                 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
                                 (int) key_length,
                                 (const char *) key_source,
@@ -8549,7 +7820,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
                 } else {
                     PM_PARSER_WARN_TOKEN_FORMAT(
                         parser,
-                        parser->current,
+                        &parser->current,
                         PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
                         (int) key_length,
                         (const char *) key_source,
@@ -8562,17 +7833,14 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
 
         // When we're done, we want to free the string in case we had to
         // allocate memory for it.
-        pm_string_free(&key);
+        pm_string_cleanup(&key);
 
         // Allocate a new magic comment node to append to the parser's list.
-        pm_magic_comment_t *magic_comment;
-        if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
-            magic_comment->key_start = key_start;
-            magic_comment->value_start = value_start;
-            magic_comment->key_length = (uint32_t) key_length;
-            magic_comment->value_length = value_length;
-            pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
-        }
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+        magic_comment->node.next = NULL;
+        magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
+        magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
+        pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
     }
 
     return result;
@@ -8582,85 +7850,67 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
 /* Context manipulations                                                      */
 /******************************************************************************/
 
-static bool
-context_terminator(pm_context_t context, pm_token_t *token) {
-    switch (context) {
-        case PM_CONTEXT_MAIN:
-        case PM_CONTEXT_DEF_PARAMS:
-        case PM_CONTEXT_DEFINED:
-        case PM_CONTEXT_MULTI_TARGET:
-        case PM_CONTEXT_TERNARY:
-        case PM_CONTEXT_RESCUE_MODIFIER:
-            return token->type == PM_TOKEN_EOF;
-        case PM_CONTEXT_DEFAULT_PARAMS:
-            return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
-        case PM_CONTEXT_PREEXE:
-        case PM_CONTEXT_POSTEXE:
-            return token->type == PM_TOKEN_BRACE_RIGHT;
-        case PM_CONTEXT_MODULE:
-        case PM_CONTEXT_CLASS:
-        case PM_CONTEXT_SCLASS:
-        case PM_CONTEXT_LAMBDA_DO_END:
-        case PM_CONTEXT_DEF:
-        case PM_CONTEXT_BLOCK_KEYWORDS:
-            return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
-        case PM_CONTEXT_WHILE:
-        case PM_CONTEXT_UNTIL:
-        case PM_CONTEXT_ELSE:
-        case PM_CONTEXT_FOR:
-        case PM_CONTEXT_BEGIN_ENSURE:
-        case PM_CONTEXT_BLOCK_ENSURE:
-        case PM_CONTEXT_CLASS_ENSURE:
-        case PM_CONTEXT_DEF_ENSURE:
-        case PM_CONTEXT_LAMBDA_ENSURE:
-        case PM_CONTEXT_MODULE_ENSURE:
-        case PM_CONTEXT_SCLASS_ENSURE:
-            return token->type == PM_TOKEN_KEYWORD_END;
-        case PM_CONTEXT_LOOP_PREDICATE:
-            return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
-        case PM_CONTEXT_FOR_INDEX:
-            return token->type == PM_TOKEN_KEYWORD_IN;
-        case PM_CONTEXT_CASE_WHEN:
-            return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
-        case PM_CONTEXT_CASE_IN:
-            return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
-        case PM_CONTEXT_IF:
-        case PM_CONTEXT_ELSIF:
-            return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
-        case PM_CONTEXT_UNLESS:
-            return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
-        case PM_CONTEXT_EMBEXPR:
-            return token->type == PM_TOKEN_EMBEXPR_END;
-        case PM_CONTEXT_BLOCK_BRACES:
-            return token->type == PM_TOKEN_BRACE_RIGHT;
-        case PM_CONTEXT_PARENS:
-            return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
-        case PM_CONTEXT_BEGIN:
-        case PM_CONTEXT_BEGIN_RESCUE:
-        case PM_CONTEXT_BLOCK_RESCUE:
-        case PM_CONTEXT_CLASS_RESCUE:
-        case PM_CONTEXT_DEF_RESCUE:
-        case PM_CONTEXT_LAMBDA_RESCUE:
-        case PM_CONTEXT_MODULE_RESCUE:
-        case PM_CONTEXT_SCLASS_RESCUE:
-            return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
-        case PM_CONTEXT_BEGIN_ELSE:
-        case PM_CONTEXT_BLOCK_ELSE:
-        case PM_CONTEXT_CLASS_ELSE:
-        case PM_CONTEXT_DEF_ELSE:
-        case PM_CONTEXT_LAMBDA_ELSE:
-        case PM_CONTEXT_MODULE_ELSE:
-        case PM_CONTEXT_SCLASS_ELSE:
-            return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
-        case PM_CONTEXT_LAMBDA_BRACES:
-            return token->type == PM_TOKEN_BRACE_RIGHT;
-        case PM_CONTEXT_PREDICATE:
-            return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
-        case PM_CONTEXT_NONE:
-            return false;
-    }
+static const uint32_t context_terminators[] = {
+    [PM_CONTEXT_NONE] = 0,
+    [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
+    [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+    [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+    [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+    [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
+    [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
+    [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
+    [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+    [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
+    [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
+};
 
-    return false;
+static PRISM_INLINE bool
+context_terminator(pm_context_t context, pm_token_t *token) {
+    return token->type < 32 && (context_terminators[context] & (1U << token->type));
 }
 
 /**
@@ -8699,7 +7949,7 @@ context_push(pm_parser_t *parser, pm_context_t context) {
 static void
 context_pop(pm_parser_t *parser) {
     pm_context_node_t *prev = parser->current_context->prev;
-    xfree(parser->current_context);
+    xfree_sized(parser->current_context, sizeof(pm_context_node_t));
     parser->current_context = prev;
 }
 
@@ -8761,6 +8011,7 @@ context_human(pm_context_t context) {
         case PM_CONTEXT_BEGIN: return "begin statement";
         case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
         case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+        case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
         case PM_CONTEXT_CASE_WHEN: return "'when' clause";
         case PM_CONTEXT_CASE_IN: return "'in' clause";
         case PM_CONTEXT_CLASS: return "class definition";
@@ -8821,11 +8072,11 @@ context_human(pm_context_t context) {
 /* Specific token lexers                                                      */
 /******************************************************************************/
 
-static inline void
+static PRISM_INLINE void
 pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
     if (invalid != NULL) {
         pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
-        pm_parser_err(parser, invalid, invalid + 1, diag_id);
+        pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
     }
 }
 
@@ -8936,7 +8187,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
                 break;
 
             // 0o1111 is an octal number
@@ -8950,7 +8201,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
                 break;
 
             // 01111 is an octal number
@@ -8964,7 +8215,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
             case '6':
             case '7':
                 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
                 break;
 
             // 0x1111 is a hexadecimal number
@@ -8978,7 +8229,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
                 break;
 
             // 0.xxx is a float
@@ -8996,11 +8247,62 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
         }
     } else {
         // If it didn't start with a 0, then we'll lex as far as we can into a
-        // decimal number.
-        parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+        // decimal number. We compute the integer value inline to avoid
+        // re-scanning the digits later in pm_integer_parse.
+        {
+            const uint8_t *cursor = parser->current.end;
+            const uint8_t *end = parser->end;
+            uint64_t value = (uint64_t) (cursor[-1] - '0');
+
+            bool has_underscore = false;
+            bool prev_underscore = false;
+            const uint8_t *invalid = NULL;
+
+            while (cursor < end) {
+                uint8_t c = *cursor;
+                if (c >= '0' && c <= '9') {
+                    if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
+                    prev_underscore = false;
+                    cursor++;
+                } else if (c == '_') {
+                    has_underscore = true;
+                    if (prev_underscore && invalid == NULL) invalid = cursor;
+                    prev_underscore = true;
+                    cursor++;
+                } else {
+                    break;
+                }
+            }
+
+            if (has_underscore) {
+                if (prev_underscore && invalid == NULL) invalid = cursor - 1;
+                pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
+            }
+
+            if (value <= UINT32_MAX) {
+                parser->integer.value = (uint32_t) value;
+                parser->integer.lexed = true;
+            }
+
+            parser->current.end = cursor;
+        }
 
         // Afterward, we'll lex as far as we can into an optional float suffix.
-        type = lex_optional_float_suffix(parser, seen_e);
+        // Guard the function call: the vast majority of decimal numbers are
+        // plain integers, so avoid the call when the next byte cannot start a
+        // float suffix.
+        {
+            uint8_t next = peek(parser);
+            if (next == '.' || next == 'e' || next == 'E') {
+                type = lex_optional_float_suffix(parser, seen_e);
+
+                // If it turned out to be a float, the cached integer value is
+                // invalid.
+                if (type != PM_TOKEN_INTEGER) {
+                    parser->integer.lexed = false;
+                }
+            }
+        }
     }
 
     // At this point we have a completed number, but we want to provide the user
@@ -9010,7 +8312,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
         const uint8_t *fraction_start = parser->current.end;
         const uint8_t *fraction_end = parser->current.end + 2;
         fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
-        pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+        pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
     }
 
     return type;
@@ -9019,7 +8321,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
 static pm_token_type_t
 lex_numeric(pm_parser_t *parser) {
     pm_token_type_t type = PM_TOKEN_INTEGER;
-    parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+    parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+    parser->integer.lexed = false;
 
     if (parser->current.end < parser->end) {
         bool seen_e = false;
@@ -9109,8 +8412,8 @@ lex_global_variable(pm_parser_t *parser) {
                 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
 
                 // $0 isn't allowed to be followed by anything.
-                pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
-                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
+                pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
             }
 
             return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9146,9 +8449,9 @@ lex_global_variable(pm_parser_t *parser) {
             } else {
                 // If we get here, then we have a $ followed by something that
                 // isn't recognized as a global variable.
-                pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
-                const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
+                pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+                size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
             }
 
             return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9168,7 +8471,7 @@ lex_global_variable(pm_parser_t *parser) {
  * * `type` - the expected token type
  * * `modifier_type` - the expected modifier token type
  */
-static inline pm_token_type_t
+static PRISM_INLINE pm_token_type_t
 lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
     if (memcmp(current_start, value, vlen) == 0) {
         pm_lex_state_t last_state = parser->lex_state;
@@ -9207,6 +8510,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
             current_end += width;
         }
     } else {
+        // Fast path: scan ASCII identifier bytes using wide operations.
+        current_end += scan_identifier_ascii(current_end, end);
+
+        // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
         while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
             current_end += width;
         }
@@ -9266,9 +8573,15 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
         switch (width) {
             case 2:
                 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
+                    if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
+                        return PM_TOKEN_KEYWORD_DO;
+                    }
                     if (pm_do_loop_stack_p(parser)) {
                         return PM_TOKEN_KEYWORD_DO_LOOP;
                     }
+                    if (!pm_accepts_block_stack_p(parser)) {
+                        return PM_TOKEN_KEYWORD_DO_BLOCK;
+                    }
                     return PM_TOKEN_KEYWORD_DO;
                 }
 
@@ -9347,8 +8660,8 @@ current_token_starts_line(pm_parser_t *parser) {
  * handle interpolation. This function performs that check. It returns a token
  * type representing what it found. Those cases are:
  *
- * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The
- *     caller should keep lexing.
+ * * 0 - No interpolation was found at this point. The caller should keep
+ *     lexing.
  * * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The
  *     caller should return this token type.
  * * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller
@@ -9365,9 +8678,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
         return PM_TOKEN_STRING_CONTENT;
     }
 
-    // Now we'll check against the character that follows the #. If it constitutes
-    // valid interplation, we'll handle that, otherwise we'll return
-    // PM_TOKEN_NOT_PROVIDED.
+    // Now we'll check against the character that follows the #. If it
+    // constitutes valid interplation, we'll handle that, otherwise we'll return
+    // 0.
     switch (pound[1]) {
         case '@': {
             // In this case we may have hit an embedded instance or class variable.
@@ -9401,7 +8714,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
             // string content. This is like if we get "#@-". In this case the caller
             // should keep lexing.
             parser->current.end = pound + 1;
-            return PM_TOKEN_NOT_PROVIDED;
+            return 0;
         }
         case '$':
             // In this case we may have hit an embedded global variable. If there's
@@ -9451,7 +8764,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
             // In this case we've hit a #$ that does not indicate a global variable.
             // In this case we'll continue lexing past it.
             parser->current.end = pound + 1;
-            return PM_TOKEN_NOT_PROVIDED;
+            return 0;
         case '{':
             // In this case it's the start of an embedded expression. If we have
             // already consumed content, then we need to return that content as string
@@ -9475,7 +8788,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
             // mark that by returning the not provided token type. This tells the
             // consumer to keep lexing forward.
             parser->current.end = pound + 1;
-            return PM_TOKEN_NOT_PROVIDED;
+            return 0;
     }
 }
 
@@ -9499,7 +8812,7 @@ static const bool ascii_printable_chars[] = {
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
 };
 
-static inline bool
+static PRISM_INLINE bool
 char_is_ascii_printable(const uint8_t b) {
     return (b < 0x80) && ascii_printable_chars[b];
 }
@@ -9508,7 +8821,7 @@ char_is_ascii_printable(const uint8_t b) {
  * Return the value that a hexadecimal digit character represents. For example,
  * transform 'a' into 10, 'b' into 11, etc.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 escape_hexadecimal_digit(const uint8_t value) {
     return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
 }
@@ -9518,8 +8831,8 @@ escape_hexadecimal_digit(const uint8_t value) {
  * digits scanned. This function assumes that the characters have already been
  * validated.
  */
-static inline uint32_t
-escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
+static PRISM_INLINE uint32_t
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
     uint32_t value = 0;
     for (size_t index = 0; index < length; index++) {
         if (index != 0) value <<= 4;
@@ -9529,7 +8842,14 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
     // Here we're going to verify that the value is actually a valid Unicode
     // codepoint and not a surrogate pair.
     if (value >= 0xD800 && value <= 0xDFFF) {
-        pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+        if (flags & PM_ESCAPE_FLAG_REGEXP) {
+            // In regexp context, defer the error to regexp encoding
+            // validation where we can produce a regexp-specific message.
+        } else if (error_location != NULL) {
+            pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
+        } else {
+            pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
+        }
         return 0xFFFD;
     }
 
@@ -9539,7 +8859,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
 /**
  * Escape a single character value based on the given flags.
  */
-static inline uint8_t
+static PRISM_INLINE uint8_t
 escape_byte(uint8_t value, const uint8_t flags) {
     if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
     if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
@@ -9549,21 +8869,32 @@ escape_byte(uint8_t value, const uint8_t flags) {
 /**
  * Write a unicode codepoint to the given buffer.
  */
-static inline void
+static PRISM_INLINE void
 escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
     // \u escape sequences in string-like structures implicitly change the
     // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
     // literal.
     if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
         if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
-            PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+            if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                // In regexp context, suppress this error — the regexp encoding
+                // validation will produce a more specific error message.
+            } else {
+                PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+            }
         }
 
         parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
     }
 
     if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
-        pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
+        if (flags & PM_ESCAPE_FLAG_REGEXP) {
+            // In regexp context, defer the error to the regexp encoding
+            // validation which produces a regexp-specific message.
+        } else {
+            pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
+        }
+
         pm_buffer_append_byte(buffer, 0xEF);
         pm_buffer_append_byte(buffer, 0xBF);
         pm_buffer_append_byte(buffer, 0xBD);
@@ -9574,11 +8905,16 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
  * When you're writing a byte to the unescape buffer, if the byte is non-ASCII
  * (i.e., the top bit is set) then it locks in the encoding.
  */
-static inline void
-escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
+static PRISM_INLINE void
+escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
     if (byte >= 0x80) {
         if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+            if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                // In regexp context, suppress this error — the regexp encoding
+                // validation will produce a more specific error message.
+            } else {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+            }
         }
 
         parser->explicit_encoding = parser->encoding;
@@ -9602,19 +8938,19 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
  * Note that in this case there is a literal \ byte in the regular expression
  * source so that the regular expression engine will perform its own unescaping.
  */
-static inline void
+static PRISM_INLINE void
 escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
     if (flags & PM_ESCAPE_FLAG_REGEXP) {
         pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
     }
 
-    escape_write_byte_encoded(parser, buffer, byte);
+    escape_write_byte_encoded(parser, buffer, flags, byte);
 }
 
 /**
  * Write each byte of the given escaped character into the buffer.
  */
-static inline void
+static PRISM_INLINE void
 escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
     size_t width;
     if (parser->encoding_changed) {
@@ -9624,6 +8960,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_
     }
 
     if (width == 1) {
+        if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
         escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
     } else if (width > 1) {
         // Valid multibyte character.  Just ignore escape.
@@ -9649,7 +8986,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
 
     PM_PARSER_WARN_TOKEN_FORMAT(
         parser,
-        parser->current,
+        &parser->current,
         PM_WARN_INVALID_CHARACTER,
         FLAG(flags),
         FLAG(flag),
@@ -9764,7 +9101,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                     }
                 }
 
-                escape_write_byte_encoded(parser, buffer, value);
+                escape_write_byte_encoded(parser, buffer, flags, value);
             } else {
                 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
             }
@@ -9777,7 +9114,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
 
             if (parser->current.end == parser->end) {
                 const uint8_t *start = parser->current.end - 2;
-                PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+                PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
             } else if (peek(parser) == '{') {
                 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
                 parser->current.end++;
@@ -9806,18 +9143,19 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
 
                     if (hexadecimal_length > 6) {
                         // \u{nnnn} character literal allows only 1-6 hexadecimal digits
-                        pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
+                        pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
                     } else if (hexadecimal_length == 0) {
                         // there are not hexadecimal characters
 
                         if (flags & PM_ESCAPE_FLAG_REGEXP) {
                             // If this is a regular expression, we are going to
                             // let the regular expression engine handle this
-                            // error instead of us.
+                            // error instead of us because we don't know at this
+                            // point if we're inside a comment in /x mode.
                             pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
                         } else {
-                            pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
-                            pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+                            pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
+                            pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
                         }
 
                         return;
@@ -9829,7 +9167,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         extra_codepoints_start = unicode_start;
                     }
 
-                    uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
+                    uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
                     escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
 
                     parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9838,21 +9176,22 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                 // ?\u{nnnn} character literal should contain only one codepoint
                 // and cannot be like ?\u{nnnn mmmm}.
                 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
-                    pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
+                    pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
                 }
 
                 if (parser->current.end == parser->end) {
-                    PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
+                    PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
                 } else if (peek(parser) == '}') {
                     parser->current.end++;
                 } else {
                     if (flags & PM_ESCAPE_FLAG_REGEXP) {
                         // If this is a regular expression, we are going to let
                         // the regular expression engine handle this error
-                        // instead of us.
+                        // instead of us because we don't know at this point if
+                        // we're inside a comment in /x mode.
                         pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
                     } else {
-                        pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+                        pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
                     }
                 }
 
@@ -9867,10 +9206,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
                     } else {
                         const uint8_t *start = parser->current.end - 2;
-                        PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+                        PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
                     }
                 } else if (length == 4) {
-                    uint32_t value = escape_unicode(parser, parser->current.end, 4);
+                    uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
 
                     if (flags & PM_ESCAPE_FLAG_REGEXP) {
                         pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9916,7 +9255,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                     parser->current.end++;
 
                     if (match(parser, 'u') || match(parser, 'U')) {
-                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
                         return;
                     }
 
@@ -9938,6 +9277,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         return;
                     }
 
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
                     return;
@@ -9952,7 +9292,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
 
             if (peek(parser) != '-') {
                 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+                pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
                 return;
             }
 
@@ -9973,7 +9313,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                     parser->current.end++;
 
                     if (match(parser, 'u') || match(parser, 'U')) {
-                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
                         return;
                     }
 
@@ -9992,10 +9332,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                 default: {
                     if (!char_is_ascii_printable(peeked)) {
                         size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                        pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
                         return;
                     }
 
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
                     return;
@@ -10010,7 +9351,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
 
             if (peek(parser) != '-') {
                 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
                 return;
             }
 
@@ -10026,7 +9367,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                     parser->current.end++;
 
                     if (match(parser, 'u') || match(parser, 'U')) {
-                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
                         return;
                     }
 
@@ -10045,10 +9386,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                 default:
                     if (!char_is_ascii_printable(peeked)) {
                         size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                        pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
                         return;
                     }
 
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
                     return;
@@ -10056,8 +9398,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
         }
         case '\r': {
             if (peek_offset(parser, 1) == '\n') {
+                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
                 parser->current.end += 2;
-                escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+                escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
                 return;
             }
             PRISM_FALLTHROUGH
@@ -10065,7 +9408,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
         default: {
             if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
                 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
-                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
                 return;
             }
             if (parser->current.end < parser->end) {
@@ -10127,10 +9470,14 @@ lex_question_mark(pm_parser_t *parser) {
         lex_state_set(parser, PM_LEX_STATE_END);
 
         pm_buffer_t buffer;
-        pm_buffer_init_capacity(&buffer, 3);
+        pm_buffer_init(&buffer, 3);
 
         escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
-        pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+
+        // Copy buffer data into the arena and free the heap buffer.
+        void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
+        pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
+        pm_buffer_cleanup(&buffer);
 
         return PM_TOKEN_CHARACTER_LITERAL;
     } else {
@@ -10173,12 +9520,12 @@ lex_at_variable(pm_parser_t *parser) {
         }
     } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
         pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
-        if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
+        if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
             diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
         }
 
         size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
-        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
     } else {
         pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
         pm_parser_err_token(parser, &parser->current, diag_id);
@@ -10196,24 +9543,23 @@ lex_at_variable(pm_parser_t *parser) {
 /**
  * Optionally call out to the lex callback if one is provided.
  */
-static inline void
+static PRISM_INLINE void
 parser_lex_callback(pm_parser_t *parser) {
-    if (parser->lex_callback) {
-        parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
+    if (parser->lex_callback.callback) {
+        parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
     }
 }
 
 /**
  * Return a new comment node of the specified type.
  */
-static inline pm_comment_t *
+static PRISM_INLINE pm_comment_t *
 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
-    pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
-    if (comment == NULL) return NULL;
+    pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
 
     *comment = (pm_comment_t) {
         .type = type,
-        .location = { parser->current.start, parser->current.end }
+        .location = TOK2LOC(parser, &parser->current)
     };
 
     return comment;
@@ -10232,7 +9578,7 @@ lex_embdoc(pm_parser_t *parser) {
     if (newline == NULL) {
         parser->current.end = parser->end;
     } else {
-        pm_newline_list_append(&parser->newline_list, newline);
+        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
         parser->current.end = newline + 1;
     }
 
@@ -10240,8 +9586,8 @@ lex_embdoc(pm_parser_t *parser) {
     parser_lex_callback(parser);
 
     // Now, create a comment that is going to be attached to the parser.
+    const uint8_t *comment_start = parser->current.start;
     pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
-    if (comment == NULL) return PM_TOKEN_EOF;
 
     // Now, loop until we find the end of the embedded documentation or the end
     // of the file.
@@ -10265,14 +9611,14 @@ lex_embdoc(pm_parser_t *parser) {
             if (newline == NULL) {
                 parser->current.end = parser->end;
             } else {
-                pm_newline_list_append(&parser->newline_list, newline);
+                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
                 parser->current.end = newline + 1;
             }
 
             parser->current.type = PM_TOKEN_EMBDOC_END;
             parser_lex_callback(parser);
 
-            comment->location.end = parser->current.end;
+            comment->location.length = (uint32_t) (parser->current.end - comment_start);
             pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
 
             return PM_TOKEN_EMBDOC_END;
@@ -10285,7 +9631,7 @@ lex_embdoc(pm_parser_t *parser) {
         if (newline == NULL) {
             parser->current.end = parser->end;
         } else {
-            pm_newline_list_append(&parser->newline_list, newline);
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
             parser->current.end = newline + 1;
         }
 
@@ -10295,7 +9641,7 @@ lex_embdoc(pm_parser_t *parser) {
 
     pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
 
-    comment->location.end = parser->current.end;
+    comment->location.length = (uint32_t) (parser->current.end - comment_start);
     pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
 
     return PM_TOKEN_EOF;
@@ -10306,7 +9652,7 @@ lex_embdoc(pm_parser_t *parser) {
  * This happens in a couple places depending on whether or not we have already
  * lexed a comment.
  */
-static inline void
+static PRISM_INLINE void
 parser_lex_ignored_newline(pm_parser_t *parser) {
     parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
     parser_lex_callback(parser);
@@ -10321,7 +9667,7 @@ parser_lex_ignored_newline(pm_parser_t *parser) {
  * If it is set, then we need to skip past the heredoc body and then clear the
  * heredoc_end field.
  */
-static inline void
+static PRISM_INLINE void
 parser_flush_heredoc_end(pm_parser_t *parser) {
     assert(parser->heredoc_end <= parser->end);
     parser->next_start = parser->heredoc_end;
@@ -10397,12 +9743,12 @@ typedef struct {
 /**
  * Push the given byte into the token buffer.
  */
-static inline void
+static PRISM_INLINE void
 pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
     pm_buffer_append_byte(&token_buffer->buffer, byte);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
     pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
 }
@@ -10410,7 +9756,7 @@ pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t
 /**
  * Return the width of the character at the end of the current token.
  */
-static inline size_t
+static PRISM_INLINE size_t
 parser_char_width(const pm_parser_t *parser) {
     size_t width;
     if (parser->encoding_changed) {
@@ -10437,36 +9783,31 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
 static void
 pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
     size_t width = parser_char_width(parser);
-    pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
-    pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
+    const uint8_t *start = parser->current.end;
+    pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
+    pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
     parser->current.end += width;
 }
 
-static bool
-pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
-    for (size_t index = 0; index < length; index++) {
-        if (value[index] & 0x80) return false;
-    }
-
-    return true;
-}
-
 /**
  * When we're about to return from lexing the current token and we know for sure
  * that we have found an escape sequence, this function is called to copy the
  * contents of the token buffer into the current string on the parser so that it
  * can be attached to the correct node.
  */
-static inline void
+static PRISM_INLINE void
 pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
-    pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
+    // Copy buffer data into the arena and free the heap buffer.
+    size_t len = pm_buffer_length(&token_buffer->buffer);
+    void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
+    pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
+    pm_buffer_cleanup(&token_buffer->buffer);
 }
 
-static inline void
+static PRISM_INLINE void
 pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
-    pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
-    parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
-    pm_buffer_free(&token_buffer->regexp_buffer);
+    pm_token_buffer_copy(parser, &token_buffer->base);
+    pm_buffer_cleanup(&token_buffer->regexp_buffer);
 }
 
 /**
@@ -10492,10 +9833,11 @@ static void
 pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
     if (token_buffer->base.cursor == NULL) {
         pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
-        parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
     } else {
-        pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
-        pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+        const uint8_t *cursor = token_buffer->base.cursor;
+        size_t length = (size_t) (parser->current.end - cursor);
+        pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
+        pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
         pm_regexp_token_buffer_copy(parser, token_buffer);
     }
 }
@@ -10514,7 +9856,7 @@ static void
 pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
     const uint8_t *start;
     if (token_buffer->cursor == NULL) {
-        pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
         start = parser->current.start;
     } else {
         start = token_buffer->cursor;
@@ -10531,8 +9873,8 @@ static void
 pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
     const uint8_t *start;
     if (token_buffer->base.cursor == NULL) {
-        pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
-        pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
         start = parser->current.start;
     } else {
         start = token_buffer->base.cursor;
@@ -10551,7 +9893,7 @@ pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *tok
  * Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
  * a tilde heredoc expands out tab characters to the nearest tab boundaries.
  */
-static inline size_t
+static PRISM_INLINE size_t
 pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
     size_t whitespace = 0;
 
@@ -10599,7 +9941,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
             parser_flush_heredoc_end(parser);
         } else {
             // Otherwise, we'll add the newline to the list of newlines.
-            pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
         }
 
         uint8_t delimiter = *parser->current.end;
@@ -10647,6 +9989,12 @@ parser_lex(pm_parser_t *parser) {
     unsigned int semantic_token_seen = parser->semantic_token_seen;
     parser->semantic_token_seen = true;
 
+    // We'll jump to this label when we are about to encounter an EOF.
+    // If we still have lex_modes on the stack, we pop them so that cleanup
+    // can happen. For example, we should still continue parsing after a heredoc
+    // identifier, even if the heredoc body was syntax invalid.
+    switch_lex_modes:
+
     switch (parser->lex_modes.current->mode) {
         case PM_LEX_DEFAULT:
         case PM_LEX_EMBEXPR:
@@ -10669,22 +10017,29 @@ parser_lex(pm_parser_t *parser) {
             bool space_seen = false;
 
             // First, we're going to skip past any whitespace at the front of the next
-            // token.
+            // token. Skip runs of inline whitespace in bulk to avoid per-character
+            // stores back to parser->current.end.
             bool chomping = true;
             while (parser->current.end < parser->end && chomping) {
-                switch (*parser->current.end) {
-                    case ' ':
-                    case '\t':
-                    case '\f':
-                    case '\v':
-                        parser->current.end++;
+                {
+                    static const uint8_t inline_whitespace[256] = {
+                        [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
+                    };
+                    const uint8_t *scan = parser->current.end;
+                    while (scan < parser->end && inline_whitespace[*scan]) scan++;
+                    if (scan > parser->current.end) {
+                        parser->current.end = scan;
                         space_seen = true;
-                        break;
+                        continue;
+                    }
+                }
+
+                switch (*parser->current.end) {
                     case '\r':
                         if (match_eol_offset(parser, 1)) {
                             chomping = false;
                         } else {
-                            pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
+                            pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
                             parser->current.end++;
                             space_seen = true;
                         }
@@ -10697,7 +10052,7 @@ parser_lex(pm_parser_t *parser) {
                                 parser->heredoc_end = NULL;
                             } else {
                                 parser->current.end += eol_length + 1;
-                                pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                                 space_seen = true;
                             }
                         } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
@@ -10720,6 +10075,14 @@ parser_lex(pm_parser_t *parser) {
             // We'll check if we're at the end of the file. If we are, then we
             // need to return the EOF token.
             if (parser->current.end >= parser->end) {
+                // We may be missing closing tokens. We should pop modes one by one
+                // to do the appropriate cleanup like moving next_start for heredocs.
+                // Only when no mode is remaining will we actually emit the EOF token.
+                if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+                    lex_mode_pop(parser);
+                    goto switch_lex_modes;
+                }
+
                 // If we hit EOF, but the EOF came immediately after a newline,
                 // set the start of the token to the newline.  This way any EOF
                 // errors will be reported as happening on that line rather than
@@ -10791,7 +10154,7 @@ parser_lex(pm_parser_t *parser) {
                         }
 
                         if (parser->heredoc_end == NULL) {
-                            pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                         }
                     }
 
@@ -10849,14 +10212,50 @@ parser_lex(pm_parser_t *parser) {
                                 following = next_newline(following, parser->end - following);
                             }
 
-                            // If the lex state was ignored, or we hit a '.' or a '&.',
-                            // we will lex the ignored newline
+                            // If the lex state was ignored, we will lex the
+                            // ignored newline.
+                            if (lex_state_ignored_p(parser)) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+                            // If we hit a '.' or a '&.' we will lex the ignored
+                            // newline.
+                            if (following && (
+                                (peek_at(parser, following) == '.') ||
+                                (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+                            )) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+
+                            // If we are parsing as CRuby 4.0 or later and we
+                            // hit a '&&' or a '||' then we will lex the ignored
+                            // newline.
                             if (
-                                lex_state_ignored_p(parser) ||
-                                (following && (
-                                    (peek_at(parser, following) == '.') ||
-                                    (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
-                                ))
+                                (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
+                                following && (
+                                    (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+                                    (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+                                    (
+                                        peek_at(parser, following) == 'a' &&
+                                        peek_at(parser, following + 1) == 'n' &&
+                                        peek_at(parser, following + 2) == 'd' &&
+                                        peek_at(parser, next_content + 3) != '!' &&
+                                        peek_at(parser, next_content + 3) != '?' &&
+                                        !char_is_identifier(parser, following + 3, parser->end - (following + 3))
+                                    ) ||
+                                    (
+                                        peek_at(parser, following) == 'o' &&
+                                        peek_at(parser, following + 1) == 'r' &&
+                                        peek_at(parser, next_content + 2) != '!' &&
+                                        peek_at(parser, next_content + 2) != '?' &&
+                                        !char_is_identifier(parser, following + 2, parser->end - (following + 2))
+                                    )
+                                )
                             ) {
                                 if (!lexed_comment) parser_lex_ignored_newline(parser);
                                 lexed_comment = false;
@@ -10896,6 +10295,67 @@ parser_lex(pm_parser_t *parser) {
                             parser->next_start = NULL;
                             LEX(PM_TOKEN_AMPERSAND_DOT);
                         }
+
+                        if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+                            // If we hit an && then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+                            }
+
+                            // If we hit a || then we are in a logical chain and
+                            // we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_PIPE_PIPE);
+                            }
+
+                            // If we hit an 'and' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'a' &&
+                                peek_at(parser, next_content + 1) == 'n' &&
+                                peek_at(parser, next_content + 2) == 'd' &&
+                                peek_at(parser, next_content + 3) != '!' &&
+                                peek_at(parser, next_content + 3) != '?' &&
+                                !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 3;
+                                parser->next_start = NULL;
+                                parser->command_start = true;
+                                LEX(PM_TOKEN_KEYWORD_AND);
+                            }
+
+                            // If we hit a 'or' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'o' &&
+                                peek_at(parser, next_content + 1) == 'r' &&
+                                peek_at(parser, next_content + 2) != '!' &&
+                                peek_at(parser, next_content + 2) != '?' &&
+                                !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                parser->command_start = true;
+                                LEX(PM_TOKEN_KEYWORD_OR);
+                            }
+                        }
                     }
 
                     // At this point we know this is a regular newline, and we can set the
@@ -10910,7 +10370,7 @@ parser_lex(pm_parser_t *parser) {
                 // ,
                 case ',':
                     if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
                     }
 
                     lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
@@ -11036,7 +10496,7 @@ parser_lex(pm_parser_t *parser) {
                         } else if (lex_state_beg_p(parser)) {
                             type = PM_TOKEN_USTAR_STAR;
                         } else if (ambiguous_operator_p(parser, space_seen)) {
-                            PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
+                            PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
                         }
 
                         if (lex_state_operator_p(parser)) {
@@ -11061,7 +10521,7 @@ parser_lex(pm_parser_t *parser) {
                     } else if (lex_state_beg_p(parser)) {
                         type = PM_TOKEN_USTAR;
                     } else if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
                     }
 
                     if (lex_state_operator_p(parser)) {
@@ -11187,7 +10647,7 @@ parser_lex(pm_parser_t *parser) {
                                 bool ident_error = false;
 
                                 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
-                                    pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+                                    pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
                                     ident_error = true;
                                 }
 
@@ -11220,7 +10680,7 @@ parser_lex(pm_parser_t *parser) {
                                     } else {
                                         // Otherwise, we want to indicate that the body of the
                                         // heredoc starts on the character after the next newline.
-                                        pm_newline_list_append(&parser->newline_list, body_start);
+                                        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
                                         body_start++;
                                     }
 
@@ -11239,7 +10699,7 @@ parser_lex(pm_parser_t *parser) {
                         }
 
                         if (ambiguous_operator_p(parser, space_seen)) {
-                            PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+                            PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
                         }
 
                         if (lex_state_operator_p(parser)) {
@@ -11365,7 +10825,7 @@ parser_lex(pm_parser_t *parser) {
                     } else if (lex_state_beg_p(parser)) {
                         type = PM_TOKEN_UAMPERSAND;
                     } else if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
                     }
 
                     if (lex_state_operator_p(parser)) {
@@ -11441,7 +10901,7 @@ parser_lex(pm_parser_t *parser) {
                     }
 
                     if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
                     }
 
                     lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11482,7 +10942,7 @@ parser_lex(pm_parser_t *parser) {
                     }
 
                     if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
                     }
 
                     lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11581,7 +11041,7 @@ parser_lex(pm_parser_t *parser) {
                     }
 
                     if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
                     }
 
                     if (lex_state_operator_p(parser)) {
@@ -11766,7 +11226,7 @@ parser_lex(pm_parser_t *parser) {
                     }
 
                     if (ambiguous_operator_p(parser, space_seen)) {
-                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
                     }
 
                     lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
@@ -11802,40 +11262,40 @@ parser_lex(pm_parser_t *parser) {
                         // token after adding an appropriate error message.
                         if (!width) {
                             if (*parser->current.start >= 0x80) {
-                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
                             } else if (*parser->current.start == '\\') {
                                 switch (peek_at(parser, parser->current.start + 1)) {
                                     case ' ':
                                         parser->current.end++;
-                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
                                         break;
                                     case '\f':
                                         parser->current.end++;
-                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
                                         break;
                                     case '\t':
                                         parser->current.end++;
-                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
                                         break;
                                     case '\v':
                                         parser->current.end++;
-                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
                                         break;
                                     case '\r':
                                         if (peek_at(parser, parser->current.start + 2) != '\n') {
                                             parser->current.end++;
-                                            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
+                                            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
                                             break;
                                         }
                                         PRISM_FALLTHROUGH
                                     default:
-                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
                                         break;
                                 }
                             } else if (char_is_ascii_printable(*parser->current.start)) {
-                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
                             } else {
-                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
                             }
 
                             goto lex_next_token;
@@ -11861,15 +11321,15 @@ parser_lex(pm_parser_t *parser) {
                         // correct column information for it.
                         const uint8_t *cursor = parser->current.end;
                         while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
-                            pm_newline_list_append(&parser->newline_list, cursor++);
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
                         }
 
                         parser->current.end = parser->end;
                         parser->current.type = PM_TOKEN___END__;
                         parser_lex_callback(parser);
 
-                        parser->data_loc.start = parser->current.start;
-                        parser->data_loc.end = parser->current.end;
+                        parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
+                        parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
 
                         LEX(PM_TOKEN_EOF);
                     }
@@ -11894,7 +11354,7 @@ parser_lex(pm_parser_t *parser) {
                         !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
                         (type == PM_TOKEN_IDENTIFIER) &&
                         ((pm_parser_local_depth(parser, &parser->current) != -1) ||
-                         pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
+                         pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
                     ) {
                         lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
                     }
@@ -11922,7 +11382,7 @@ parser_lex(pm_parser_t *parser) {
                     whitespace += 1;
                 }
             } else {
-                whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
+                whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
             }
 
             if (whitespace > 0) {
@@ -12037,7 +11497,7 @@ parser_lex(pm_parser_t *parser) {
                                 LEX(PM_TOKEN_STRING_CONTENT);
                             } else {
                                 // ... else track the newline.
-                                pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                             }
 
                             parser->current.end++;
@@ -12065,7 +11525,7 @@ parser_lex(pm_parser_t *parser) {
                 if (*breakpoint == '#') {
                     pm_token_type_t type = lex_interpolation(parser, breakpoint);
 
-                    if (type == PM_TOKEN_NOT_PROVIDED) {
+                    if (!type) {
                         // If we haven't returned at this point then we had something
                         // that looked like an interpolated class or instance variable
                         // like "#@" but wasn't actually. In this case we'll just skip
@@ -12170,7 +11630,13 @@ parser_lex(pm_parser_t *parser) {
                     size_t eol_length = match_eol_at(parser, breakpoint);
                     if (eol_length) {
                         parser->current.end = breakpoint + eol_length;
-                        pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+                        // Track the newline if we're not in a heredoc that
+                        // would have already have added the newline to the
+                        // list.
+                        if (parser->heredoc_end == NULL) {
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                        }
                     } else {
                         parser->current.end = breakpoint + 1;
                     }
@@ -12216,7 +11682,7 @@ parser_lex(pm_parser_t *parser) {
                         // If we've hit a newline, then we need to track that in
                         // the list of newlines.
                         if (parser->heredoc_end == NULL) {
-                            pm_newline_list_append(&parser->newline_list, breakpoint);
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
                             parser->current.end = breakpoint + 1;
                             breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
                             break;
@@ -12264,7 +11730,7 @@ parser_lex(pm_parser_t *parser) {
                                     LEX(PM_TOKEN_STRING_CONTENT);
                                 } else {
                                     // ... else track the newline.
-                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                    pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                                 }
 
                                 parser->current.end++;
@@ -12311,7 +11777,7 @@ parser_lex(pm_parser_t *parser) {
                         // interpolation.
                         pm_token_type_t type = lex_interpolation(parser, breakpoint);
 
-                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                        if (!type) {
                             // If we haven't returned at this point then we had
                             // something that looked like an interpolated class or
                             // instance variable like "#@" but wasn't actually. In
@@ -12424,7 +11890,13 @@ parser_lex(pm_parser_t *parser) {
                     size_t eol_length = match_eol_at(parser, breakpoint);
                     if (eol_length) {
                         parser->current.end = breakpoint + eol_length;
-                        pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+                        // Track the newline if we're not in a heredoc that
+                        // would have already have added the newline to the
+                        // list.
+                        if (parser->heredoc_end == NULL) {
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                        }
                     } else {
                         parser->current.end = breakpoint + 1;
                     }
@@ -12436,6 +11908,13 @@ parser_lex(pm_parser_t *parser) {
                         LEX(PM_TOKEN_LABEL_END);
                     }
 
+                    // When the delimiter itself is a newline, we won't
+                    // get a chance to flush heredocs in the usual places since
+                    // the newline is already consumed.
+                    if (term == '\n' && parser->heredoc_end) {
+                        parser_flush_heredoc_end(parser);
+                    }
+
                     lex_state_set(parser, PM_LEX_STATE_END);
                     lex_mode_pop(parser);
                     LEX(PM_TOKEN_STRING_END);
@@ -12468,7 +11947,7 @@ parser_lex(pm_parser_t *parser) {
                         // for the terminator in case the terminator is a
                         // newline character.
                         if (parser->heredoc_end == NULL) {
-                            pm_newline_list_append(&parser->newline_list, breakpoint);
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
                             parser->current.end = breakpoint + 1;
                             breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
                             break;
@@ -12522,7 +12001,7 @@ parser_lex(pm_parser_t *parser) {
                                     LEX(PM_TOKEN_STRING_CONTENT);
                                 } else {
                                     // ... else track the newline.
-                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                    pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                                 }
 
                                 parser->current.end++;
@@ -12551,7 +12030,7 @@ parser_lex(pm_parser_t *parser) {
                     case '#': {
                         pm_token_type_t type = lex_interpolation(parser, breakpoint);
 
-                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                        if (!type) {
                             // If we haven't returned at this point then we had something that
                             // looked like an interpolated class or instance variable like "#@"
                             // but wasn't actually. In this case we'll just skip to the next
@@ -12651,7 +12130,7 @@ parser_lex(pm_parser_t *parser) {
                         (memcmp(terminator_start, ident_start, ident_length) == 0)
                     ) {
                         if (newline != NULL) {
-                            pm_newline_list_append(&parser->newline_list, newline);
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
                         }
 
                         parser->current.end = terminator_end;
@@ -12682,7 +12161,7 @@ parser_lex(pm_parser_t *parser) {
             // Otherwise we'll be parsing string content. These are the places
             // where we need to split up the content of the heredoc. We'll use
             // strpbrk to find the first of these characters.
-            uint8_t breakpoints[] = "\r\n\\#";
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
 
             pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
             if (quote == PM_HEREDOC_QUOTE_SINGLE) {
@@ -12723,7 +12202,7 @@ parser_lex(pm_parser_t *parser) {
                             LEX(PM_TOKEN_STRING_CONTENT);
                         }
 
-                        pm_newline_list_append(&parser->newline_list, breakpoint);
+                        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
 
                         // If we have a - or ~ heredoc, then we can match after
                         // some leading whitespace.
@@ -12841,7 +12320,10 @@ parser_lex(pm_parser_t *parser) {
                                     // string content.
                                     if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
                                         const uint8_t *end = parser->current.end;
-                                        pm_newline_list_append(&parser->newline_list, end);
+
+                                        if (parser->heredoc_end == NULL) {
+                                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
+                                        }
 
                                         // Here we want the buffer to only
                                         // include up to the backslash.
@@ -12872,7 +12354,7 @@ parser_lex(pm_parser_t *parser) {
                     case '#': {
                         pm_token_type_t type = lex_interpolation(parser, breakpoint);
 
-                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                        if (!type) {
                             // If we haven't returned at this point then we had
                             // something that looked like an interpolated class
                             // or instance variable like "#@" but wasn't
@@ -13097,7 +12579,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
 /**
  * Returns true if the current token is of the given type.
  */
-static inline bool
+static PRISM_INLINE bool
 match1(const pm_parser_t *parser, pm_token_type_t type) {
     return parser->current.type == type;
 }
@@ -13105,7 +12587,7 @@ match1(const pm_parser_t *parser, pm_token_type_t type) {
 /**
  * Returns true if the current token is of either of the given types.
  */
-static inline bool
+static PRISM_INLINE bool
 match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
     return match1(parser, type1) || match1(parser, type2);
 }
@@ -13113,7 +12595,7 @@ match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2)
 /**
  * Returns true if the current token is any of the three given types.
  */
-static inline bool
+static PRISM_INLINE bool
 match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
 }
@@ -13121,15 +12603,23 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
 /**
  * Returns true if the current token is any of the four given types.
  */
-static inline bool
+static PRISM_INLINE bool
 match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
 }
 
 /**
+ * Returns true if the current token is any of the six given types.
+ */
+static PRISM_INLINE bool
+match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
+}
+
+/**
  * Returns true if the current token is any of the seven given types.
  */
-static inline bool
+static PRISM_INLINE bool
 match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
 }
@@ -13137,20 +12627,12 @@ match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
 /**
  * Returns true if the current token is any of the eight given types.
  */
-static inline bool
+static PRISM_INLINE bool
 match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
 }
 
 /**
- * Returns true if the current token is any of the nine given types.
- */
-static inline bool
-match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
-    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
-}
-
-/**
  * If the current token is of the specified type, lex forward by one token and
  * return true. Otherwise, return false. For example:
  *
@@ -13169,7 +12651,7 @@ accept1(pm_parser_t *parser, pm_token_type_t type) {
  * If the current token is either of the two given types, lex forward by one
  * token and return true. Otherwise return false.
  */
-static inline bool
+static PRISM_INLINE bool
 accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
     if (match2(parser, type1, type2)) {
         parser_lex(parser);
@@ -13194,10 +12676,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
     if (accept1(parser, type)) return;
 
     const uint8_t *location = parser->previous.end;
-    pm_parser_err(parser, location, location, diag_id);
+    pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
 
     parser->previous.start = location;
-    parser->previous.type = PM_TOKEN_MISSING;
+    parser->previous.type = 0;
 }
 
 /**
@@ -13209,10 +12691,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
     if (accept2(parser, type1, type2)) return;
 
     const uint8_t *location = parser->previous.end;
-    pm_parser_err(parser, location, location, diag_id);
+    pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
 
     parser->previous.start = location;
-    parser->previous.type = PM_TOKEN_MISSING;
+    parser->previous.type = 0;
 }
 
 /**
@@ -13226,20 +12708,43 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide
     } else {
         pm_parser_err_heredoc_term(parser, ident_start, ident_length);
         parser->previous.start = parser->previous.end;
-        parser->previous.type = PM_TOKEN_MISSING;
+        parser->previous.type = 0;
     }
 }
 
+/**
+ * A special expect1 that attaches the error to the opening token location
+ * rather than the current position. This is useful for errors about missing
+ * closing tokens, where we want to point to the line with the opening token
+ * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file.
+ */
+static void
+expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
+    if (accept1(parser, type)) return;
+
+    const uint8_t *start = opening->start;
+    pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
+
+    parser->previous.start = parser->previous.end;
+    parser->previous.type = 0;
+}
+
+/** Flags for controlling expression parsing behavior. */
+#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
+#define PM_PARSE_ACCEPTS_LABEL        ((uint8_t) 0x2)
+#define PM_PARSE_ACCEPTS_DO_BLOCK     ((uint8_t) 0x4)
+#define PM_PARSE_IN_ENDLESS_DEF       ((uint8_t) 0x8)
+
 static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
 
 /**
  * This is a wrapper of parse_expression, which also checks whether the
  * resulting node is a value expression.
  */
 static pm_node_t *
-parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
-    pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
     pm_assert_value_expression(parser, node);
     return node;
 }
@@ -13262,7 +12767,7 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bo
  * work in all cases, it may need to be refactored later. But it appears to work
  * for now.
  */
-static inline bool
+static PRISM_INLINE bool
 token_begins_expression_p(pm_token_type_t type) {
     switch (type) {
         case PM_TOKEN_EQUAL_GREATER:
@@ -13278,6 +12783,7 @@ token_begins_expression_p(pm_token_type_t type) {
         case PM_TOKEN_EOF:
         case PM_TOKEN_LAMBDA_BEGIN:
         case PM_TOKEN_KEYWORD_DO:
+        case PM_TOKEN_KEYWORD_DO_BLOCK:
         case PM_TOKEN_KEYWORD_DO_LOOP:
         case PM_TOKEN_KEYWORD_END:
         case PM_TOKEN_KEYWORD_ELSE:
@@ -13323,14 +12829,89 @@ token_begins_expression_p(pm_token_type_t type) {
  * prefixed by the * operator.
  */
 static pm_node_t *
-parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
     if (accept1(parser, PM_TOKEN_USTAR)) {
         pm_token_t operator = parser->previous;
-        pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
-        return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+        pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+        return UP(pm_splat_node_create(parser, &operator, expression));
     }
 
-    return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
+    return parse_value_expression(parser, binding_power, flags, diag_id, depth);
+}
+
+static bool
+pm_node_unreference_each(const pm_node_t *node, void *data) {
+    switch (PM_NODE_TYPE(node)) {
+        /* When we are about to destroy a set of nodes that could potentially
+         * contain block exits for the current scope, we need to check if they
+         * are contained in the list of block exits and remove them if they are.
+         */
+        case PM_BREAK_NODE:
+        case PM_NEXT_NODE:
+        case PM_REDO_NODE: {
+            pm_parser_t *parser = (pm_parser_t *) data;
+            size_t index = 0;
+
+            while (index < parser->current_block_exits->size) {
+                pm_node_t *block_exit = parser->current_block_exits->nodes[index];
+
+                if (block_exit == node) {
+                    if (index + 1 < parser->current_block_exits->size) {
+                        memmove(
+                            &parser->current_block_exits->nodes[index],
+                            &parser->current_block_exits->nodes[index + 1],
+                            (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
+                        );
+                    }
+                    parser->current_block_exits->size--;
+
+                    /* Note returning true here because these nodes could have
+                     * arguments that are themselves block exits. */
+                    return true;
+                }
+
+                index++;
+            }
+
+            return true;
+        }
+        /* When an implicit local variable is written to or targeted, it becomes
+         * a regular, named local variable. This branch removes it from the list
+         * of implicit parameters when that happens. */
+        case PM_LOCAL_VARIABLE_READ_NODE:
+        case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+            pm_parser_t *parser = (pm_parser_t *) data;
+            pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+            for (size_t index = 0; index < implicit_parameters->size; index++) {
+                if (implicit_parameters->nodes[index] == node) {
+                    /* If the node is not the last one in the list, we need to
+                     * shift the remaining nodes down to fill the gap. This is
+                     * extremely unlikely to happen. */
+                    if (index != implicit_parameters->size - 1) {
+                        memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+                    }
+
+                    implicit_parameters->size--;
+                    break;
+                }
+            }
+
+            return false;
+        }
+        default:
+            return true;
+    }
+}
+
+/**
+ * When we are about to destroy a set of nodes that could potentially be
+ * referenced by one or more lists on the parser, then remove them from those
+ * lists so we don't get a use-after-free.
+ */
+static void
+pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
+    pm_visit_node(node, pm_node_unreference_each, parser);
 }
 
 /**
@@ -13345,16 +12926,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
     // append an =.
     pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
     size_t length = constant->length;
-    uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
-    if (name == NULL) return;
+    uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
 
     memcpy(name, constant->start, length);
     name[length] = '=';
 
-    // Now switch the name to the new string.
-    // This silences clang analyzer warning about leak of memory pointed by `name`.
-    // NOLINTNEXTLINE(clang-analyzer-*)
-    *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
+    *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
 }
 
 /**
@@ -13376,35 +12953,10 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
         default: break;
     }
 
-    pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+    pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
     pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
 
-    pm_node_destroy(parser, target);
-    return (pm_node_t *) result;
-}
-
-/**
- * When an implicit local variable is written to or targeted, it becomes a
- * regular, named local variable. This function removes it from the list of
- * implicit parameters when that happens.
- */
-static void
-parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
-    pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
-
-    for (size_t index = 0; index < implicit_parameters->size; index++) {
-        if (implicit_parameters->nodes[index] == node) {
-            // If the node is not the last one in the list, we need to shift the
-            // remaining nodes down to fill the gap. This is extremely unlikely
-            // to happen.
-            if (index != implicit_parameters->size - 1) {
-                memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
-            }
-
-            implicit_parameters->size--;
-            break;
-        }
-    }
+    return UP(result);
 }
 
 /**
@@ -13418,7 +12970,7 @@ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
 static pm_node_t *
 parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
     switch (PM_NODE_TYPE(target)) {
-        case PM_MISSING_NODE:
+        case PM_ERROR_RECOVERY_NODE:
             return target;
         case PM_SOURCE_ENCODING_NODE:
         case PM_FALSE_NODE:
@@ -13456,15 +13008,15 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
         case PM_BACK_REFERENCE_READ_NODE:
         case PM_NUMBERED_REFERENCE_READ_NODE:
             PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
-            return target;
+            return UP(pm_error_recovery_node_create_unexpected(parser, target));
         case PM_GLOBAL_VARIABLE_READ_NODE:
             assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
             target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
             return target;
         case PM_LOCAL_VARIABLE_READ_NODE: {
-            if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
-                PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
-                parse_target_implicit_parameter(parser, target);
+            if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
+                PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
+                pm_node_unreference(parser, target);
             }
 
             const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
@@ -13479,10 +13031,9 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
         }
         case PM_IT_LOCAL_VARIABLE_READ_NODE: {
             pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
-            pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
+            pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
 
-            parse_target_implicit_parameter(parser, target);
-            pm_node_destroy(parser, target);
+            pm_node_unreference(parser, target);
 
             return node;
         }
@@ -13505,7 +13056,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
                 splat->expression = parse_target(parser, splat->expression, multiple, true);
             }
 
-            return (pm_node_t *) splat;
+            return UP(splat);
         }
         case PM_CALL_NODE: {
             pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13514,10 +13065,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
             // target then this is either a method call or a local variable
             // write.
             if (
-                (call->message_loc.start != NULL) &&
-                (call->message_loc.end[-1] != '!') &&
-                (call->message_loc.end[-1] != '?') &&
-                (call->opening_loc.start == NULL) &&
+                (call->message_loc.length > 0) &&
+                (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+                (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+                (call->opening_loc.length == 0) &&
                 (call->arguments == NULL) &&
                 (call->block == NULL)
             ) {
@@ -13531,21 +13082,19 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
                     // When it was parsed in the prefix position, foo was seen as a
                     // method call with no receiver and no arguments. Now we have an
                     // =, so we know it's a local variable write.
-                    const pm_location_t message_loc = call->message_loc;
-
-                    pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
-                    pm_node_destroy(parser, target);
+                    pm_location_t message_loc = call->message_loc;
+                    pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
 
-                    return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
+                    return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
                 }
 
-                if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+                if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
                     if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
                         pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
                     }
 
                     parse_write_name(parser, &call->name);
-                    return (pm_node_t *) pm_call_target_node_create(parser, call);
+                    return UP(pm_call_target_node_create(parser, call));
                 }
             }
 
@@ -13553,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
             // an aref expression, and we can transform it into an aset
             // expression.
             if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
-                return (pm_node_t *) pm_index_target_node_create(parser, call);
+                return UP(pm_index_target_node_create(parser, call));
             }
         }
         PRISM_FALLTHROUGH
@@ -13596,7 +13145,7 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
     pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
 
     if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
-        return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
+        return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
     }
 
     return write;
@@ -13608,16 +13157,14 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
 static pm_node_t *
 parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
     switch (PM_NODE_TYPE(target)) {
-        case PM_MISSING_NODE:
-            pm_node_destroy(parser, value);
+        case PM_ERROR_RECOVERY_NODE:
             return target;
         case PM_CLASS_VARIABLE_READ_NODE: {
             pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
-            pm_node_destroy(parser, target);
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_CONSTANT_PATH_NODE: {
-            pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
+            pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
 
             if (context_def_p(parser)) {
                 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
@@ -13626,13 +13173,12 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             return parse_shareable_constant_write(parser, node);
         }
         case PM_CONSTANT_READ_NODE: {
-            pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
+            pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
 
             if (context_def_p(parser)) {
                 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
             }
 
-            pm_node_destroy(parser, target);
             return parse_shareable_constant_write(parser, node);
         }
         case PM_BACK_REFERENCE_READ_NODE:
@@ -13641,45 +13187,40 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             PRISM_FALLTHROUGH
         case PM_GLOBAL_VARIABLE_READ_NODE: {
             pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
-            pm_node_destroy(parser, target);
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_LOCAL_VARIABLE_READ_NODE: {
             pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
 
+            pm_location_t location = target->location;
             pm_constant_id_t name = local_read->name;
-            pm_location_t name_loc = target->location;
-
             uint32_t depth = local_read->depth;
             pm_scope_t *scope = pm_parser_scope_find(parser, depth);
 
-            if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+            if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
                 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
-                PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
-                parse_target_implicit_parameter(parser, target);
+                PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
+                pm_node_unreference(parser, target);
             }
 
             pm_locals_unread(&scope->locals, name);
-            pm_node_destroy(parser, target);
 
-            return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
+            return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
         }
         case PM_IT_LOCAL_VARIABLE_READ_NODE: {
             pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
-            pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
+            pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
 
-            parse_target_implicit_parameter(parser, target);
-            pm_node_destroy(parser, target);
+            pm_node_unreference(parser, target);
 
             return node;
         }
         case PM_INSTANCE_VARIABLE_READ_NODE: {
-            pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
-            pm_node_destroy(parser, target);
+            pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
             return write_node;
         }
         case PM_MULTI_TARGET_NODE:
-            return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
+            return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
         case PM_SPLAT_NODE: {
             pm_splat_node_t *splat = (pm_splat_node_t *) target;
 
@@ -13688,9 +13229,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             }
 
             pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
-            pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
+            pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
 
-            return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
+            return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
         }
         case PM_CALL_NODE: {
             pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13699,10 +13240,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             // target then this is either a method call or a local variable
             // write.
             if (
-                (call->message_loc.start != NULL) &&
-                (call->message_loc.end[-1] != '!') &&
-                (call->message_loc.end[-1] != '?') &&
-                (call->opening_loc.start == NULL) &&
+                (call->message_loc.length > 0) &&
+                (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+                (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+                (call->opening_loc.length == 0) &&
                 (call->arguments == NULL) &&
                 (call->block == NULL)
             ) {
@@ -13716,19 +13257,18 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                     // When it was parsed in the prefix position, foo was seen as a
                     // method call with no receiver and no arguments. Now we have an
                     // =, so we know it's a local variable write.
-                    const pm_location_t message = call->message_loc;
+                    pm_location_t message_loc = call->message_loc;
 
-                    pm_parser_local_add_location(parser, message.start, message.end, 0);
-                    pm_node_destroy(parser, target);
+                    pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
+                    pm_parser_local_add_location(parser, &message_loc, 0);
 
-                    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
-                    target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
+                    pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
+                    target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
 
-                    pm_refute_numbered_parameter(parser, message.start, message.end);
                     return target;
                 }
 
-                if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
+                if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
                     // When we get here, we have a method call, because it was
                     // previously marked as a method call but now we have an =. This
                     // looks like:
@@ -13742,13 +13282,14 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                     pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
                     call->arguments = arguments;
 
-                    pm_arguments_node_arguments_append(arguments, value);
-                    call->base.location.end = arguments->base.location.end;
+                    pm_arguments_node_arguments_append(parser->arena, arguments, value);
+                    PM_NODE_LENGTH_SET_NODE(call, arguments);
+                    call->equal_loc = TOK2LOC(parser, operator);
 
                     parse_write_name(parser, &call->name);
-                    pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+                    pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
 
-                    return (pm_node_t *) call;
+                    return UP(call);
                 }
             }
 
@@ -13760,25 +13301,31 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                     call->arguments = pm_arguments_node_create(parser);
                 }
 
-                pm_arguments_node_arguments_append(call->arguments, value);
-                target->location.end = value->location.end;
+                pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
+                PM_NODE_LENGTH_SET_NODE(target, value);
 
                 // Replace the name with "[]=".
                 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
+                call->equal_loc = TOK2LOC(parser, operator);
 
                 // Ensure that the arguments for []= don't contain keywords
                 pm_index_arguments_check(parser, call->arguments, call->block);
-                pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+                pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
 
                 return target;
             }
 
-            // If there are arguments on the call node, then it can't be a method
-            // call ending with = or a local variable write, so it must be a
-            // syntax error. In this case we'll fall through to our default
+            // If there are arguments on the call node, then it can't be a
+            // method call ending with = or a local variable write, so it must
+            // be a syntax error. In this case we'll fall through to our default
             // handling. We need to free the value that we parsed because there
             // is no way for us to attach it to the tree at this point.
-            pm_node_destroy(parser, value);
+            //
+            // Since it is possible for the value to contain an implicit
+            // parameter somewhere in its subtree, we need to walk it and remove
+            // any implicit parameters from the list of implicit parameters for
+            // the current scope.
+            pm_node_unreference(parser, value);
         }
         PRISM_FALLTHROUGH
         default:
@@ -13809,11 +13356,10 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t
         default: break;
     }
 
-    pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
+    pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
     pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
 
-    pm_node_destroy(parser, target);
-    return (pm_node_t *) result;
+    return UP(result);
 }
 
 /**
@@ -13846,35 +13392,35 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
             pm_node_t *name = NULL;
 
             if (token_begins_expression_p(parser->current.type)) {
-                name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
                 name = parse_target(parser, name, true, true);
             }
 
-            pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+            pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
             pm_multi_target_node_targets_append(parser, result, splat);
             has_rest = true;
         } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
             context_push(parser, PM_CONTEXT_MULTI_TARGET);
-            pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
             target = parse_target(parser, target, true, false);
 
             pm_multi_target_node_targets_append(parser, result, target);
             context_pop(parser);
         } else if (token_begins_expression_p(parser->current.type)) {
-            pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
             target = parse_target(parser, target, true, false);
 
             pm_multi_target_node_targets_append(parser, result, target);
         } else if (!match1(parser, PM_TOKEN_EOF)) {
             // If we get here, then we have a trailing , in a multi target node.
             // We'll add an implicit rest node to represent this.
-            pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+            pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
             pm_multi_target_node_targets_append(parser, result, rest);
             break;
         }
     }
 
-    return (pm_node_t *) result;
+    return UP(result);
 }
 
 /**
@@ -13884,7 +13430,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
 static pm_node_t *
 parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
     pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
-    accept1(parser, PM_TOKEN_NEWLINE);
+
+    // If we're inside parentheses, then we allow a newline before the
+    // closing parenthesis or equals sign. Outside of parentheses, a newline
+    // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
+    if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+        accept1(parser, PM_TOKEN_NEWLINE);
+    }
 
     // Ensure that we have either an = or a ) after the targets.
     if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -13913,7 +13465,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
     context_push(parser, context);
 
     while (true) {
-        pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+        pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
         pm_statements_node_body_append(parser, statements, node, true);
 
         // If we're recovering from a syntax error, then we need to stop parsing
@@ -13953,7 +13505,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
         // we were unable to parse an expression, then we will skip past this
         // token and continue parsing the statements list. Otherwise we'll add
         // an error and continue parsing the statements list.
-        if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
+        if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
             parser_lex(parser);
 
             // If we are at the end of the file, then we need to stop parsing
@@ -13971,13 +13523,14 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
             // This is an inlined version of accept1 because the error that we
             // want to add has varargs. If this happens again, we should
             // probably extract a helper function.
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
             parser->previous.start = parser->previous.end;
-            parser->previous.type = PM_TOKEN_MISSING;
+            parser->previous.type = 0;
         }
     }
 
     context_pop(parser);
+
     bool last_value = true;
     switch (context) {
         case PM_CONTEXT_BEGIN_ENSURE:
@@ -13998,23 +13551,24 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
  */
 static void
 pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
-    const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
+    const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
 
     if (duplicated != NULL) {
         pm_buffer_t buffer = { 0 };
-        pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
+        pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
 
         pm_diagnostic_list_append_format(
+            &parser->metadata_arena,
             &parser->warning_list,
             duplicated->location.start,
-            duplicated->location.end,
+            duplicated->location.length,
             PM_WARN_DUPLICATED_HASH_KEY,
             (int) pm_buffer_length(&buffer),
             pm_buffer_value(&buffer),
-            pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+            pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
         );
 
-        pm_buffer_free(&buffer);
+        pm_buffer_cleanup(&buffer);
     }
 }
 
@@ -14026,14 +13580,15 @@ static void
 pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
     pm_node_t *previous;
 
-    if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
+    if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
         pm_diagnostic_list_append_format(
+            &parser->metadata_arena,
             &parser->warning_list,
-            node->location.start,
-            node->location.end,
+            PM_NODE_START(node),
+            PM_NODE_LENGTH(node),
             PM_WARN_DUPLICATED_WHEN_CLAUSE,
-            pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
-            pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
+            pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
+            pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
         );
     }
 }
@@ -14061,14 +13616,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
                     // inner hash to share the static literals with the outer
                     // hash.
                     parser->current_hash_keys = literals;
-                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
                 } else if (token_begins_expression_p(parser->current.type)) {
-                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
                 } else {
                     pm_parser_scope_forwarding_keywords_check(parser, &operator);
                 }
 
-                element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+                element = UP(pm_assoc_splat_node_create(parser, value, &operator));
                 contains_keyword_splat = true;
                 break;
             }
@@ -14076,44 +13631,43 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
                 pm_token_t label = parser->current;
                 parser_lex(parser);
 
-                pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+                pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
                 pm_hash_key_static_literals_add(parser, literals, key);
 
-                pm_token_t operator = not_provided(parser);
                 pm_node_t *value = NULL;
 
                 if (token_begins_expression_p(parser->current.type)) {
-                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
                 } else {
                     if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
                         pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
-                        value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+                        value = UP(pm_constant_read_node_create(parser, &constant));
                     } else {
                         int depth = -1;
                         pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
 
                         if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
-                            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
+                            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
                         } else {
                             depth = pm_parser_local_depth(parser, &identifier);
                         }
 
                         if (depth == -1) {
-                            value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
+                            value = UP(pm_call_node_variable_call_create(parser, &identifier));
                         } else {
-                            value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
+                            value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
                         }
                     }
 
-                    value->location.end++;
-                    value = (pm_node_t *) pm_implicit_node_create(parser, value);
+                    value->location.length++;
+                    value = UP(pm_implicit_node_create(parser, value));
                 }
 
-                element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+                element = UP(pm_assoc_node_create(parser, key, NULL, value));
                 break;
             }
             default: {
-                pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
+                pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
 
                 // Hash keys that are strings are automatically frozen. We will
                 // mark that here.
@@ -14123,24 +13677,22 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
 
                 pm_hash_key_static_literals_add(parser, literals, key);
 
-                pm_token_t operator;
-                if (pm_symbol_node_label_p(key)) {
-                    operator = not_provided(parser);
-                } else {
+                pm_token_t operator = { 0 };
+                if (!pm_symbol_node_label_p(parser, key)) {
                     expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
                     operator = parser->previous;
                 }
 
-                pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
-                element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+                pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
                 break;
             }
         }
 
         if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
-            pm_hash_node_elements_append((pm_hash_node_t *) node, element);
+            pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
         } else {
-            pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
+            pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
         }
 
         // If there's no comma after the element, then we're done.
@@ -14161,23 +13713,47 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
     return contains_keyword_splat;
 }
 
+static PRISM_INLINE bool
+argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
+    if (pm_symbol_node_label_p(parser, argument)) {
+        return true;
+    }
+
+    switch (PM_NODE_TYPE(argument)) {
+        case PM_CALL_NODE: {
+            pm_call_node_t *cast = (pm_call_node_t *) argument;
+            if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
+                if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+                    return false;
+                }
+                if (cast->block != NULL) {
+                    return false;
+                }
+            }
+            break;
+        }
+        default: break;
+    }
+    return accept1(parser, PM_TOKEN_EQUAL_GREATER);
+}
+
 /**
  * Append an argument to a list of arguments.
  */
-static inline void
+static PRISM_INLINE void
 parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
     if (arguments->arguments == NULL) {
         arguments->arguments = pm_arguments_node_create(parser);
     }
 
-    pm_arguments_node_arguments_append(arguments->arguments, argument);
+    pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
 }
 
 /**
  * Parse a list of arguments.
  */
 static void
-parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
+parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
     pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
 
     // First we need to check if the next token is one that could be the start
@@ -14210,16 +13786,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 }
 
                 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
-                argument = (pm_node_t *) hash;
+                argument = UP(hash);
 
                 pm_static_literals_t hash_keys = { 0 };
-                bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
+                bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
 
                 parse_arguments_append(parser, arguments, argument);
 
-                pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
-                if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
-                pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+                pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+                if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+                pm_node_flag_set(UP(arguments->arguments), node_flags);
 
                 pm_static_literals_free(&hash_keys);
                 parsed_bare_hash = true;
@@ -14232,12 +13808,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 pm_node_t *expression = NULL;
 
                 if (token_begins_expression_p(parser->current.type)) {
-                    expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                    expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
                 } else {
                     pm_parser_scope_forwarding_block_check(parser, &operator);
                 }
 
-                argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
+                argument = UP(pm_block_argument_node_create(parser, &operator, expression));
                 if (parsed_block_argument) {
                     parse_arguments_append(parser, arguments, argument);
                 } else {
@@ -14257,18 +13833,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
 
                 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
                     pm_parser_scope_forwarding_positionals_check(parser, &operator);
-                    argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
+                    argument = UP(pm_splat_node_create(parser, &operator, NULL));
                     if (parsed_bare_hash) {
                         pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
                     }
                 } else {
-                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
 
                     if (parsed_bare_hash) {
-                        pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+                        pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
                     }
 
-                    argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+                    argument = UP(pm_splat_node_create(parser, &operator, expression));
                 }
 
                 parse_arguments_append(parser, arguments, argument);
@@ -14283,26 +13859,26 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                         // not actually argument forwarding but was instead a
                         // range.
                         pm_token_t operator = parser->previous;
-                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
 
                         // If we parse a range, we need to validate that we
                         // didn't accidentally violate the nonassoc rules of the
                         // ... operator.
                         if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
                             pm_range_node_t *range = (pm_range_node_t *) right;
-                            pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+                            pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
                         }
 
-                        argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+                        argument = UP(pm_range_node_create(parser, NULL, &operator, right));
                     } else {
                         pm_parser_scope_forwarding_all_check(parser, &parser->previous);
                         if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
                             pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
                         }
 
-                        argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
+                        argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
                         parse_arguments_append(parser, arguments, argument);
-                        pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
+                        pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
                         arguments->has_forwarding = true;
                         parsed_forwarding_arguments = true;
                         break;
@@ -14312,22 +13888,20 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
             PRISM_FALLTHROUGH
             default: {
                 if (argument == NULL) {
-                    argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                    argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
                 }
 
                 bool contains_keywords = false;
                 bool contains_keyword_splat = false;
 
-                if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+                if (argument_allowed_for_bare_hash(parser, argument)) {
                     if (parsed_bare_hash) {
                         pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
                     }
 
-                    pm_token_t operator;
+                    pm_token_t operator = { 0 };
                     if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
                         operator = parser->previous;
-                    } else {
-                        operator = not_provided(parser);
                     }
 
                     pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
@@ -14338,18 +13912,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                     pm_hash_key_static_literals_add(parser, &hash_keys, argument);
 
                     // Finish parsing the one we are part way through.
-                    pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
-                    argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
+                    pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                    argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
 
-                    pm_keyword_hash_node_elements_append(bare_hash, argument);
-                    argument = (pm_node_t *) bare_hash;
+                    pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
+                    argument = UP(bare_hash);
 
                     // Then parse more if we have a comma
                     if (accept1(parser, PM_TOKEN_COMMA) && (
                         token_begins_expression_p(parser->current.type) ||
                         match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
                     )) {
-                        contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
+                        contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
                     }
 
                     pm_static_literals_free(&hash_keys);
@@ -14358,10 +13932,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
 
                 parse_arguments_append(parser, arguments, argument);
 
-                pm_node_flags_t flags = 0;
-                if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
-                if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
-                pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+                pm_node_flags_t node_flags = 0;
+                if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+                if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+                pm_node_flag_set(UP(arguments->arguments), node_flags);
 
                 break;
             }
@@ -14370,7 +13944,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
         parsed_first_argument = true;
 
         // If parsing the argument failed, we need to stop parsing arguments.
-        if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
+        if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
 
         // If the terminator of these arguments is not EOF, then we have a
         // specific token we're looking for. In that case we can accept a
@@ -14390,6 +13964,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
             if (accepted_newline) {
                 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
             }
+
+            // If this is a command call and an argument takes a block,
+            // there can be no further arguments. For example,
+            // `foo(bar 1 do end, 2)` should be rejected.
+            if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
+                pm_call_node_t *call = (pm_call_node_t *) argument;
+                if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
+                    pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+                    break;
+                }
+            }
         } else {
             // If there is no comma at the end of the argument list then we're
             // done parsing arguments and can break out of this loop.
@@ -14417,7 +14002,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
     expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
 
     pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
-    pm_multi_target_node_opening_set(node, &parser->previous);
+    pm_multi_target_node_opening_set(parser, node, &parser->previous);
 
     do {
         pm_node_t *param;
@@ -14427,33 +14012,33 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
         // commas, so here we'll assume this is a mistake of the user not
         // knowing it's not allowed here.
         if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-            param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+            param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
             pm_multi_target_node_targets_append(parser, node, param);
             pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
             break;
         }
 
         if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
-            param = (pm_node_t *) parse_required_destructured_parameter(parser);
+            param = UP(parse_required_destructured_parameter(parser));
         } else if (accept1(parser, PM_TOKEN_USTAR)) {
             pm_token_t star = parser->previous;
             pm_node_t *value = NULL;
 
             if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
                 pm_token_t name = parser->previous;
-                value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+                value = UP(pm_required_parameter_node_create(parser, &name));
                 if (pm_parser_parameter_name_check(parser, &name)) {
                     pm_node_flag_set_repeated_parameter(value);
                 }
                 pm_parser_local_add_token(parser, &name, 1);
             }
 
-            param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
+            param = UP(pm_splat_node_create(parser, &star, value));
         } else {
             expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
             pm_token_t name = parser->previous;
 
-            param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+            param = UP(pm_required_parameter_node_create(parser, &name));
             if (pm_parser_parameter_name_check(parser, &name)) {
                 pm_node_flag_set_repeated_parameter(param);
             }
@@ -14465,7 +14050,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
 
     accept1(parser, PM_TOKEN_NEWLINE);
     expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
-    pm_multi_target_node_closing_set(node, &parser->previous);
+    pm_multi_target_node_closing_set(parser, node, &parser->previous);
 
     return node;
 }
@@ -14541,6 +14126,43 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
     return true;
 }
 
+static PRISM_INLINE void
+parse_parameters_handle_trailing_comma(
+    pm_parser_t *parser,
+    pm_parameters_node_t *params,
+    pm_parameters_order_t order,
+    bool in_block,
+    bool allows_trailing_comma
+) {
+    if (!allows_trailing_comma) {
+        pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+        return;
+    }
+
+    if (in_block) {
+        if (order >= PM_PARAMETERS_ORDER_NAMED) {
+            // foo do |bar,|; end
+            pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+
+            if (params->rest == NULL) {
+                pm_parameters_node_rest_set(params, param);
+            } else {
+                pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
+                pm_parameters_node_posts_append(parser->arena, params, UP(param));
+            }
+        } else {
+            // foo do |*bar,|; end
+            pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+        }
+    } else {
+        // https://bugs.ruby-lang.org/issues/19107
+        // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
+        if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
+            pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+        }
+    }
+}
+
 /**
  * Parse a list of parameters on a method definition.
  */
@@ -14553,6 +14175,7 @@ parse_parameters(
     bool allows_forwarding_parameters,
     bool accepts_blocks_in_defaults,
     bool in_block,
+    pm_diagnostic_id_t diag_id_forwarding,
     uint16_t depth
 ) {
     pm_do_loop_stack_push(parser, false);
@@ -14566,12 +14189,12 @@ parse_parameters(
         switch (parser->current.type) {
             case PM_TOKEN_PARENTHESIS_LEFT: {
                 update_parameter_state(parser, &parser->current, &order);
-                pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
+                pm_node_t *param = UP(parse_required_destructured_parameter(parser));
 
                 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
-                    pm_parameters_node_requireds_append(params, param);
+                    pm_parameters_node_requireds_append(parser->arena, params, param);
                 } else {
-                    pm_parameters_node_posts_append(params, param);
+                    pm_parameters_node_posts_append(parser->arena, params, param);
                 }
                 break;
             }
@@ -14581,34 +14204,40 @@ parse_parameters(
                 parser_lex(parser);
 
                 pm_token_t operator = parser->previous;
-                pm_token_t name;
+                pm_node_t *param;
 
-                bool repeated = false;
-                if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
-                    name = parser->previous;
-                    repeated = pm_parser_parameter_name_check(parser, &name);
-                    pm_parser_local_add_token(parser, &name, 1);
+                if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+                    param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
                 } else {
-                    name = not_provided(parser);
-                    parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
-                }
+                    pm_token_t name = {0};
 
-                pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
-                if (repeated) {
-                    pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+                    bool repeated = false;
+                    if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+                        name = parser->previous;
+                        repeated = pm_parser_parameter_name_check(parser, &name);
+                        pm_parser_local_add_token(parser, &name, 1);
+                    } else {
+                        parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
+                    }
+
+                    param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
+                    if (repeated) {
+                        pm_node_flag_set_repeated_parameter(param);
+                    }
                 }
+
                 if (params->block == NULL) {
                     pm_parameters_node_block_set(params, param);
                 } else {
-                    pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
-                    pm_parameters_node_posts_append(params, (pm_node_t *) param);
+                    pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
+                    pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
                 }
 
                 break;
             }
             case PM_TOKEN_UDOT_DOT_DOT: {
                 if (!allows_forwarding_parameters) {
-                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+                    pm_parser_err_current(parser, diag_id_forwarding);
                 }
 
                 bool succeeded = update_parameter_state(parser, &parser->current, &order);
@@ -14621,12 +14250,12 @@ parse_parameters(
                     // If we already have a keyword rest parameter, then we replace it with the
                     // forwarding parameter and move the keyword rest parameter to the posts list.
                     pm_node_t *keyword_rest = params->keyword_rest;
-                    pm_parameters_node_posts_append(params, keyword_rest);
+                    pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
                     if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
                     params->keyword_rest = NULL;
                 }
 
-                pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
+                pm_parameters_node_keyword_rest_set(params, UP(param));
                 break;
             }
             case PM_TOKEN_CLASS_VARIABLE:
@@ -14671,24 +14300,24 @@ parse_parameters(
                     parser_lex(parser);
 
                     pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
-                    uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+                    uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
 
                     if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
-                    pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
+                    pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
                     if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
 
                     pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
 
                     if (repeated) {
-                        pm_node_flag_set_repeated_parameter((pm_node_t *) param);
+                        pm_node_flag_set_repeated_parameter(UP(param));
                     }
-                    pm_parameters_node_optionals_append(params, param);
+                    pm_parameters_node_optionals_append(parser->arena, params, param);
 
                     // If the value of the parameter increased the number of
                     // reads of that parameter, then we need to warn that we
                     // have a circular definition.
-                    if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
+                    if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
                     }
 
                     context_pop(parser);
@@ -14703,15 +14332,15 @@ parse_parameters(
                 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
                     pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
                     if (repeated) {
-                        pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+                        pm_node_flag_set_repeated_parameter(UP(param));
                     }
-                    pm_parameters_node_requireds_append(params, (pm_node_t *) param);
+                    pm_parameters_node_requireds_append(parser->arena, params, UP(param));
                 } else {
                     pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
                     if (repeated) {
-                        pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+                        pm_node_flag_set_repeated_parameter(UP(param));
                     }
-                    pm_parameters_node_posts_append(params, (pm_node_t *) param);
+                    pm_parameters_node_posts_append(parser->arena, params, UP(param));
                 }
 
                 break;
@@ -14728,9 +14357,9 @@ parse_parameters(
                 local.end -= 1;
 
                 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
-                    pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+                    pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
                 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
-                    PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+                    PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
                 }
 
                 bool repeated = pm_parser_parameter_name_check(parser, &local);
@@ -14742,12 +14371,12 @@ parse_parameters(
                     case PM_TOKEN_PIPE: {
                         context_pop(parser);
 
-                        pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+                        pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
                         if (repeated) {
                             pm_node_flag_set_repeated_parameter(param);
                         }
 
-                        pm_parameters_node_keywords_append(params, param);
+                        pm_parameters_node_keywords_append(parser->arena, params, param);
                         break;
                     }
                     case PM_TOKEN_SEMICOLON:
@@ -14759,12 +14388,12 @@ parse_parameters(
                             break;
                         }
 
-                        pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+                        pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
                         if (repeated) {
                             pm_node_flag_set_repeated_parameter(param);
                         }
 
-                        pm_parameters_node_keywords_append(params, param);
+                        pm_parameters_node_keywords_append(parser->arena, params, param);
                         break;
                     }
                     default: {
@@ -14772,20 +14401,20 @@ parse_parameters(
 
                         if (token_begins_expression_p(parser->current.type)) {
                             pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
-                            uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+                            uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
 
                             if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
-                            pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
+                            pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
                             if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
 
-                            if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
-                                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
+                            if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+                                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
                             }
 
-                            param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
+                            param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
                         }
                         else {
-                            param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+                            param = UP(pm_required_keyword_parameter_node_create(parser, &name));
                         }
 
                         if (repeated) {
@@ -14793,7 +14422,7 @@ parse_parameters(
                         }
 
                         context_pop(parser);
-                        pm_parameters_node_keywords_append(params, param);
+                        pm_parameters_node_keywords_append(parser->arena, params, param);
 
                         // If parsing the value of the parameter resulted in error recovery,
                         // then we can put a missing node in its place and stop parsing the
@@ -14814,7 +14443,7 @@ parse_parameters(
                 parser_lex(parser);
 
                 pm_token_t operator = parser->previous;
-                pm_token_t name;
+                pm_token_t name = { 0 };
                 bool repeated = false;
 
                 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14822,11 +14451,10 @@ parse_parameters(
                     repeated = pm_parser_parameter_name_check(parser, &name);
                     pm_parser_local_add_token(parser, &name, 1);
                 } else {
-                    name = not_provided(parser);
                     parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
                 }
 
-                pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
+                pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
                 if (repeated) {
                     pm_node_flag_set_repeated_parameter(param);
                 }
@@ -14835,7 +14463,7 @@ parse_parameters(
                     pm_parameters_node_rest_set(params, param);
                 } else {
                     pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
-                    pm_parameters_node_posts_append(params, param);
+                    pm_parameters_node_posts_append(parser->arena, params, param);
                 }
 
                 break;
@@ -14854,9 +14482,9 @@ parse_parameters(
                         pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
                     }
 
-                    param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+                    param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
                 } else {
-                    pm_token_t name;
+                    pm_token_t name = { 0 };
 
                     bool repeated = false;
                     if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14864,11 +14492,10 @@ parse_parameters(
                         repeated = pm_parser_parameter_name_check(parser, &name);
                         pm_parser_local_add_token(parser, &name, 1);
                     } else {
-                        name = not_provided(parser);
                         parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
                     }
 
-                    param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
+                    param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
                     if (repeated) {
                         pm_node_flag_set_repeated_parameter(param);
                     }
@@ -14878,27 +14505,14 @@ parse_parameters(
                     pm_parameters_node_keyword_rest_set(params, param);
                 } else {
                     pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
-                    pm_parameters_node_posts_append(params, param);
+                    pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
                 }
 
                 break;
             }
             default:
                 if (parser->previous.type == PM_TOKEN_COMMA) {
-                    if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
-                        // If we get here, then we have a trailing comma in a
-                        // block parameter list.
-                        pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
-
-                        if (params->rest == NULL) {
-                            pm_parameters_node_rest_set(params, param);
-                        } else {
-                            pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
-                            pm_parameters_node_posts_append(params, (pm_node_t *) param);
-                        }
-                    } else {
-                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
-                    }
+                    parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
                 }
 
                 parsing = false;
@@ -14930,8 +14544,7 @@ parse_parameters(
     pm_do_loop_stack_pop(parser);
 
     // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
-    if (params->base.location.start == params->base.location.end) {
-        pm_node_destroy(parser, (pm_node_t *) params);
+    if (PM_NODE_START(params) == PM_NODE_END(params)) {
         return NULL;
     }
 
@@ -14948,13 +14561,13 @@ token_newline_index(const pm_parser_t *parser) {
         // This is the common case. In this case we can look at the previously
         // recorded newline in the newline list and subtract from the current
         // offset.
-        return parser->newline_list.size - 1;
+        return parser->line_offsets.size - 1;
     } else {
         // This is unlikely. This is the case that we have already parsed the
         // start of a heredoc, so we cannot rely on looking at the previous
         // offset of the newline list, and instead must go through the whole
         // process of a binary search for the line number.
-        return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
+        return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
     }
 }
 
@@ -14964,7 +14577,7 @@ token_newline_index(const pm_parser_t *parser) {
  */
 static int64_t
 token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
-    const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
+    const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
     const uint8_t *end = token->start;
 
     // Skip over the BOM if it is present.
@@ -15028,8 +14641,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind
     // Otherwise, add a warning.
     PM_PARSER_WARN_FORMAT(
         parser,
-        closing_token->start,
-        closing_token->end,
+        PM_TOKEN_START(parser, closing_token),
+        PM_TOKEN_LENGTH(closing_token),
         PM_WARN_INDENTATION_MISMATCH,
         (int) (closing_token->end - closing_token->start),
         (const char *) closing_token->start,
@@ -15053,7 +14666,7 @@ typedef enum {
  * Parse any number of rescue clauses. This will form a linked list of if
  * nodes pointing to each other from the top.
  */
-static inline void
+static PRISM_INLINE void
 parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
     pm_rescue_node_t *current = NULL;
 
@@ -15069,9 +14682,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
                 // we're going to have an empty list of exceptions to rescue (which
                 // implies StandardError).
                 parser_lex(parser);
-                pm_rescue_node_operator_set(rescue, &parser->previous);
+                pm_rescue_node_operator_set(parser, rescue, &parser->previous);
 
-                pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+                pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
                 reference = parse_target(parser, reference, false, false);
 
                 pm_rescue_node_reference_set(rescue, reference);
@@ -15090,7 +14703,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
 
                     do {
                         pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
-                        pm_rescue_node_exceptions_append(rescue, expression);
+                        pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
 
                         // If we hit a newline, then this is the end of the rescue expression. We
                         // can continue on to parse the statements.
@@ -15099,9 +14712,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
                         // If we hit a `=>` then we're going to parse the exception variable. Once
                         // we've done that, we'll break out of the loop and parse the statements.
                         if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
-                            pm_rescue_node_operator_set(rescue, &parser->previous);
+                            pm_rescue_node_operator_set(parser, rescue, &parser->previous);
 
-                            pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+                            pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
                             reference = parse_target(parser, reference, false, false);
 
                             pm_rescue_node_reference_set(rescue, reference);
@@ -15114,11 +14727,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
 
         if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
             if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
-                rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+                rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
             }
         } else {
             expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
-            rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+            rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
         }
 
         if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
@@ -15156,11 +14769,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
     // since we won't know the end until we've found all subsequent
     // clauses. This sets the end location on all rescues once we know it.
     if (current != NULL) {
-        const uint8_t *end_to_set = current->base.location.end;
         pm_rescue_node_t *clause = parent_node->rescue_clause;
 
         while (clause != NULL) {
-            clause->base.location.end = end_to_set;
+            PM_NODE_LENGTH_SET_NODE(clause, current);
             clause = clause->subsequent;
         }
     }
@@ -15203,7 +14815,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
 
         // If we don't have a `current` rescue node, then this is a dangling
         // else, and it's an error.
-        if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
+        if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
     }
 
     if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
@@ -15241,10 +14853,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
 
     if (match1(parser, PM_TOKEN_KEYWORD_END)) {
         if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
-        pm_begin_node_end_keyword_set(parent_node, &parser->current);
+        pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
     } else {
-        pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
-        pm_begin_node_end_keyword_set(parent_node, &end_keyword);
+        pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
+        pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
     }
 }
 
@@ -15254,11 +14866,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
  */
 static pm_begin_node_t *
 parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
-    pm_token_t begin_keyword = not_provided(parser);
-    pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
-
+    pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
     parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
-    node->base.location.start = start;
+
+    node->base.location.start = U32(start - parser->start);
+    PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
 
     return node;
 }
@@ -15277,6 +14889,9 @@ parse_block_parameters(
 ) {
     pm_parameters_node_t *parameters = NULL;
     if (!match1(parser, PM_TOKEN_SEMICOLON)) {
+        if (!is_lambda_literal) {
+            context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
+        }
         parameters = parse_parameters(
             parser,
             is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
@@ -15285,12 +14900,16 @@ parse_block_parameters(
             false,
             accepts_blocks_in_defaults,
             true,
+            is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
             (uint16_t) (depth + 1)
         );
+        if (!is_lambda_literal) {
+            context_pop(parser);
+        }
     }
 
     pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
-    if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
+    if (opening != NULL) {
         accept1(parser, PM_TOKEN_NEWLINE);
 
         if (accept1(parser, PM_TOKEN_SEMICOLON)) {
@@ -15321,9 +14940,9 @@ parse_block_parameters(
                 pm_parser_local_add_token(parser, &parser->previous, 1);
 
                 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
-                if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
+                if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
 
-                pm_block_parameters_node_append_local(block_parameters, local);
+                pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
             } while (accept1(parser, PM_TOKEN_COMMA));
         }
     }
@@ -15403,8 +15022,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
                 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
             } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
                 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
-            } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
-                numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+            } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+                numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
             } else {
                 assert(false && "unreachable");
             }
@@ -15423,13 +15042,11 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
         for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
             scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
         }
-
-        const pm_location_t location = { .start = opening->start, .end = closing->end };
-        return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
+        return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
     }
 
     if (it_parameter) {
-        return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
+        return UP(pm_it_parameters_node_create(parser, opening, closing));
     }
 
     return NULL;
@@ -15461,7 +15078,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
             expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
         }
 
-        pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+        pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
     }
 
     accept1(parser, PM_TOKEN_NEWLINE);
@@ -15469,30 +15086,30 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
 
     if (opening.type == PM_TOKEN_BRACE_LEFT) {
         if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
-            statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
+            statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
         }
 
-        expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
+        expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
     } else {
         if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
             if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
                 pm_accepts_block_stack_push(parser, true);
-                statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
+                statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
                 pm_accepts_block_stack_pop(parser);
             }
 
             if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
                 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
+                statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
             }
         }
 
-        expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
     }
 
     pm_constant_id_list_t locals;
     pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
-    pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
+    pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
 
     pm_parser_scope_pop(parser);
     pm_accepts_block_stack_pop(parser);
@@ -15506,42 +15123,54 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
  * arguments, or blocks).
  */
 static bool
-parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
+parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
+    /* Fast path: if the current token can't begin an expression and isn't
+     * a parenthesis, block opener, or splat/block-pass operator, there are
+     * no arguments to parse. */
+    if (
+        !token_begins_expression_p(parser->current.type) &&
+        !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
+    ) {
+        return false;
+    }
+
     bool found = false;
+    bool parsed_command_args = false;
 
     if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
         found |= true;
-        arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+        arguments->opening_loc = TOK2LOC(parser, &parser->previous);
 
         if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-            arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+            arguments->closing_loc = TOK2LOC(parser, &parser->previous);
         } else {
             pm_accepts_block_stack_push(parser, true);
-            parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
+            parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
 
             if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
                 parser->previous.start = parser->previous.end;
-                parser->previous.type = PM_TOKEN_MISSING;
+                parser->previous.type = 0;
             }
 
             pm_accepts_block_stack_pop(parser);
-            arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+            arguments->closing_loc = TOK2LOC(parser, &parser->previous);
         }
-    } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
+    } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
         found |= true;
+        parsed_command_args = true;
         pm_accepts_block_stack_push(parser, false);
 
         // If we get here, then the subsequent token cannot be used as an infix
         // operator. In this case we assume the subsequent token is part of an
         // argument to this method call.
-        parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+        parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
 
         // If we have done with the arguments and still not consumed the comma,
         // then we have a trailing comma where we need to check whether it is
         // allowed or not.
         if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
         }
 
         pm_accepts_block_stack_pop(parser);
@@ -15560,21 +15189,24 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
         } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
             found |= true;
             block = parse_block(parser, (uint16_t) (depth + 1));
+        } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
+            found |= true;
+            block = parse_block(parser, (uint16_t) (depth + 1));
         }
 
         if (block != NULL) {
             if (arguments->block == NULL && !arguments->has_forwarding) {
-                arguments->block = (pm_node_t *) block;
+                arguments->block = UP(block);
             } else {
-                pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
+                pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
 
                 if (arguments->block != NULL) {
                     if (arguments->arguments == NULL) {
                         arguments->arguments = pm_arguments_node_create(parser);
                     }
-                    pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
+                    pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
                 }
-                arguments->block = (pm_node_t *) block;
+                arguments->block = UP(block);
             }
         }
     }
@@ -15642,6 +15274,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
             case PM_CONTEXT_BLOCK_ENSURE:
             case PM_CONTEXT_BLOCK_KEYWORDS:
             case PM_CONTEXT_BLOCK_RESCUE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
             case PM_CONTEXT_DEF_ELSE:
             case PM_CONTEXT_DEF_ENSURE:
             case PM_CONTEXT_DEF_PARAMS:
@@ -15661,7 +15294,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
                 break;
         }
     }
-    if (in_sclass) {
+    if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
         pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
     }
 }
@@ -15678,6 +15311,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
             case PM_CONTEXT_BLOCK_KEYWORDS:
             case PM_CONTEXT_BLOCK_ELSE:
             case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
             case PM_CONTEXT_BLOCK_RESCUE:
             case PM_CONTEXT_DEFINED:
             case PM_CONTEXT_FOR:
@@ -15687,12 +15321,19 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
             case PM_CONTEXT_LAMBDA_ENSURE:
             case PM_CONTEXT_LAMBDA_RESCUE:
             case PM_CONTEXT_LOOP_PREDICATE:
-            case PM_CONTEXT_POSTEXE:
             case PM_CONTEXT_UNTIL:
             case PM_CONTEXT_WHILE:
                 // These are the good cases. We're allowed to have a block exit
                 // in these contexts.
                 return;
+            case PM_CONTEXT_POSTEXE:
+                // https://bugs.ruby-lang.org/issues/20409
+                if (context_node->context == PM_CONTEXT_POSTEXE) {
+                    if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+                        return;
+                    }
+                }
+            PRISM_FALLTHROUGH
             case PM_CONTEXT_DEF:
             case PM_CONTEXT_DEF_PARAMS:
             case PM_CONTEXT_DEF_ELSE:
@@ -15714,7 +15355,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
                 // block exit to the list of exits for the expression, and
                 // the node parsing will handle validating it instead.
                 assert(parser->current_block_exits != NULL);
-                pm_node_list_append(parser->current_block_exits, node);
+                pm_node_list_append(parser->arena, parser->current_block_exits, node);
                 return;
             case PM_CONTEXT_BEGIN_ELSE:
             case PM_CONTEXT_BEGIN_ENSURE:
@@ -15805,7 +15446,7 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
         // However, they could still become valid in a higher level context if
         // there is another list above this one. In this case we'll push all of
         // the block exits up to the previous list.
-        pm_node_list_concat(previous_block_exits, parser->current_block_exits);
+        pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
         parser->current_block_exits = previous_block_exits;
     } else {
         // If we did not match a trailing while/until and this was the last
@@ -15815,11 +15456,11 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
     }
 }
 
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
 parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
     context_push(parser, PM_CONTEXT_PREDICATE);
     pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
-    pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
+    pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
 
     // Predicates are closed by a term, a "then", or a term and then a "then".
     bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -15837,13 +15478,13 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
     return predicate;
 }
 
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
 parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
     pm_node_list_t current_block_exits = { 0 };
     pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
 
     pm_token_t keyword = parser->previous;
-    pm_token_t then_keyword = not_provided(parser);
+    pm_token_t then_keyword = { 0 };
 
     pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
     pm_statements_node_t *statements = NULL;
@@ -15855,15 +15496,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
         accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
     }
 
-    pm_token_t end_keyword = not_provided(parser);
     pm_node_t *parent = NULL;
 
     switch (context) {
         case PM_CONTEXT_IF:
-            parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+            parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
             break;
         case PM_CONTEXT_UNLESS:
-            parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
+            parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
             break;
         default:
             assert(false && "unreachable");
@@ -15877,7 +15517,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
     if (context == PM_CONTEXT_IF) {
         while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
             if (parser_end_of_line_p(parser)) {
-                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
             }
 
             parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
@@ -15891,7 +15531,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
             pm_accepts_block_stack_pop(parser);
             accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
 
-            pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+            pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
             ((pm_if_node_t *) current)->subsequent = elsif;
             current = elsif;
         }
@@ -15910,13 +15550,13 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
 
         accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
         parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
-        expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
 
         pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
 
         switch (context) {
             case PM_CONTEXT_IF:
-                ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
+                ((pm_if_node_t *) current)->subsequent = UP(else_node);
                 break;
             case PM_CONTEXT_UNLESS:
                 ((pm_unless_node_t *) parent)->else_clause = else_node;
@@ -15927,7 +15567,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
         }
     } else {
         parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
-        expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
     }
 
     // Set the appropriate end location for all of the nodes in the subtree.
@@ -15939,12 +15579,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
             while (recursing) {
                 switch (PM_NODE_TYPE(current)) {
                     case PM_IF_NODE:
-                        pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
+                        pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
                         current = ((pm_if_node_t *) current)->subsequent;
                         recursing = current != NULL;
                         break;
                     case PM_ELSE_NODE:
-                        pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
+                        pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
                         recursing = false;
                         break;
                     default: {
@@ -15956,7 +15596,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
             break;
         }
         case PM_CONTEXT_UNLESS:
-            pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
+            pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
             break;
         default:
             assert(false && "unreachable");
@@ -15964,8 +15604,6 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
     }
 
     pop_block_exits(parser, previous_block_exits);
-    pm_node_list_free(&current_block_exits);
-
     return parent;
 }
 
@@ -15976,7 +15614,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
 #define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
     case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
     case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
-    case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
+    case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
     case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
     case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
     case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
@@ -16039,7 +15677,7 @@ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int
  * If the encoding was explicitly set through the lexing process, then we need
  * to potentially mark the string's flags to indicate how to encode it.
  */
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
 parse_unescaped_encoding(const pm_parser_t *parser) {
     if (parser->explicit_encoding != NULL) {
         if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
@@ -16071,10 +15709,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
         //     "aaa #{bbb} #@ccc ddd"
         //      ^^^^      ^     ^^^^
         case PM_TOKEN_STRING_CONTENT: {
-            pm_token_t opening = not_provided(parser);
-            pm_token_t closing = not_provided(parser);
-
-            pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+            pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
             pm_node_flag_set(node, parse_unescaped_encoding(parser));
 
             parser_lex(parser);
@@ -16101,7 +15736,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
             pm_token_t opening = parser->previous;
             pm_statements_node_t *statements = NULL;
 
-            if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
+            if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                 pm_accepts_block_stack_push(parser, true);
                 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
                 pm_accepts_block_stack_pop(parser);
@@ -16109,9 +15744,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
 
             parser->brace_nesting = brace_nesting;
             lex_state_set(parser, state);
-
             expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
-            pm_token_t closing = parser->previous;
 
             // If this set of embedded statements only contains a single
             // statement, then Ruby does not consider it as a possible statement
@@ -16120,7 +15753,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
                 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
             }
 
-            return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
+            return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
         }
 
         // Here the lexer has returned the beginning of an embedded variable.
@@ -16145,42 +15778,42 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
                 // create a global variable read node.
                 case PM_TOKEN_BACK_REFERENCE:
                     parser_lex(parser);
-                    variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+                    variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
                     break;
                 // In this case an nth reference is being interpolated. We'll
                 // create a global variable read node.
                 case PM_TOKEN_NUMBERED_REFERENCE:
                     parser_lex(parser);
-                    variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+                    variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
                     break;
                 // In this case a global variable is being interpolated. We'll
                 // create a global variable read node.
                 case PM_TOKEN_GLOBAL_VARIABLE:
                     parser_lex(parser);
-                    variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+                    variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
                     break;
                 // In this case an instance variable is being interpolated.
                 // We'll create an instance variable read node.
                 case PM_TOKEN_INSTANCE_VARIABLE:
                     parser_lex(parser);
-                    variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+                    variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
                     break;
                 // In this case a class variable is being interpolated. We'll
                 // create a class variable read node.
                 case PM_TOKEN_CLASS_VARIABLE:
                     parser_lex(parser);
-                    variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+                    variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
                     break;
                 // We can hit here if we got an invalid token. In that case
                 // we'll not attempt to lex this token and instead just return a
                 // missing node.
                 default:
                     expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
-                    variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+                    variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
                     break;
             }
 
-            return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
+            return UP(pm_embedded_variable_node_create(parser, &operator, variable));
         }
         default:
             parser_lex(parser);
@@ -16208,18 +15841,16 @@ parse_operator_symbol_name(const pm_token_t *name) {
 
 static pm_node_t *
 parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
-    pm_token_t closing = not_provided(parser);
-    pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
-
+    pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
     const uint8_t *end = parse_operator_symbol_name(&parser->current);
 
     if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
     parser_lex(parser);
 
     pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
-    pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
+    pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
 
-    return (pm_node_t *) symbol;
+    return UP(symbol);
 }
 
 /**
@@ -16253,13 +15884,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
                 break;
         }
 
-        pm_token_t closing = not_provided(parser);
-        pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+        pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
         pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
-        pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+        pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
 
-        return (pm_node_t *) symbol;
+        return UP(symbol);
     }
 
     if (lex_mode->as.string.interpolation) {
@@ -16267,10 +15896,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
         if (match1(parser, PM_TOKEN_STRING_END)) {
             if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
             parser_lex(parser);
+            pm_token_t content = {
+                .type = PM_TOKEN_STRING_CONTENT,
+                .start = parser->previous.start,
+                .end = parser->previous.start
+            };
 
-            pm_token_t content = not_provided(parser);
-            pm_token_t closing = parser->previous;
-            return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
+            return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
         }
 
         // Now we can parse the first part of the symbol.
@@ -16282,15 +15914,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
             if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
             expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
 
-            return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
+            return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
         }
 
         pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
-        if (part) pm_interpolated_symbol_node_append(symbol, part);
+        if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
 
         while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
             if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                pm_interpolated_symbol_node_append(symbol, part);
+                pm_interpolated_symbol_node_append(parser->arena, symbol, part);
             }
         }
 
@@ -16301,8 +15933,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
             expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
         }
 
-        pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
-        return (pm_node_t *) symbol;
+        pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+        return UP(symbol);
     }
 
     pm_token_t content;
@@ -16324,13 +15956,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
         // interpolated string node, so that's what we'll do here.
         if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
             pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
-            pm_token_t bounds = not_provided(parser);
+            pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+            pm_interpolated_symbol_node_append(parser->arena, symbol, part);
 
-            pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
-            pm_interpolated_symbol_node_append(symbol, part);
-
-            part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
-            pm_interpolated_symbol_node_append(symbol, part);
+            part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
+            pm_interpolated_symbol_node_append(parser->arena, symbol, part);
 
             if (next_state != PM_LEX_STATE_NONE) {
                 lex_state_set(parser, next_state);
@@ -16339,8 +15969,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
             parser_lex(parser);
             expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
 
-            pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
-            return (pm_node_t *) symbol;
+            pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+            return UP(symbol);
         }
     } else {
         content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
@@ -16357,34 +15987,29 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
         expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
     }
 
-    return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
+    return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
 }
 
 /**
  * Parse an argument to undef which can either be a bare word, a symbol, a
  * constant, or an interpolated symbol.
  */
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
 parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
     switch (parser->current.type) {
-        case PM_CASE_OPERATOR: {
-            const pm_token_t opening = not_provided(parser);
-            return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
-        }
+        case PM_CASE_OPERATOR:
+            return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
         case PM_CASE_KEYWORD:
         case PM_TOKEN_CONSTANT:
         case PM_TOKEN_IDENTIFIER:
         case PM_TOKEN_METHOD_NAME: {
             parser_lex(parser);
 
-            pm_token_t opening = not_provided(parser);
-            pm_token_t closing = not_provided(parser);
-            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
             pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
-            pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+            pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
 
-            return (pm_node_t *) symbol;
+            return UP(symbol);
         }
         case PM_TOKEN_SYMBOL_BEGIN: {
             pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16394,7 +16019,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
         }
         default:
             pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
-            return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+            return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
     }
 }
 
@@ -16404,13 +16029,11 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
  * we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM
  * between the first and second arguments.
  */
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
 parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
     switch (parser->current.type) {
-        case PM_CASE_OPERATOR: {
-            const pm_token_t opening = not_provided(parser);
-            return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
-        }
+        case PM_CASE_OPERATOR:
+            return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
         case PM_CASE_KEYWORD:
         case PM_TOKEN_CONSTANT:
         case PM_TOKEN_IDENTIFIER:
@@ -16418,14 +16041,11 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
             if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
             parser_lex(parser);
 
-            pm_token_t opening = not_provided(parser);
-            pm_token_t closing = not_provided(parser);
-            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
             pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
-            pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+            pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
 
-            return (pm_node_t *) symbol;
+            return UP(symbol);
         }
         case PM_TOKEN_SYMBOL_BEGIN: {
             pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16435,16 +16055,16 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
         }
         case PM_TOKEN_BACK_REFERENCE:
             parser_lex(parser);
-            return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+            return UP(pm_back_reference_read_node_create(parser, &parser->previous));
         case PM_TOKEN_NUMBERED_REFERENCE:
             parser_lex(parser);
-            return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+            return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
         case PM_TOKEN_GLOBAL_VARIABLE:
             parser_lex(parser);
-            return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+            return UP(pm_global_variable_read_node_create(parser, &parser->previous));
         default:
             pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
-            return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+            return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
     }
 }
 
@@ -16456,10 +16076,10 @@ static pm_node_t *
 parse_variable(pm_parser_t *parser) {
     pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
     int depth;
-    bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
+    bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
 
     if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
-        return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
+        return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
     }
 
     pm_scope_t *current_scope = parser->current_scope;
@@ -16478,13 +16098,13 @@ parse_variable(pm_parser_t *parser) {
                 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
             }
 
-            pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
-            pm_node_list_append(&current_scope->implicit_parameters, node);
+            pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
+            pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
 
             return node;
-        } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
-            pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
-            pm_node_list_append(&current_scope->implicit_parameters, node);
+        } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+            pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
+            pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
 
             return node;
         }
@@ -16507,9 +16127,9 @@ parse_variable_call(pm_parser_t *parser) {
     }
 
     pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
-    pm_node_flag_set((pm_node_t *)node, flags);
+    pm_node_flag_set(UP(node), flags);
 
-    return (pm_node_t *) node;
+    return UP(node);
 }
 
 /**
@@ -16517,7 +16137,7 @@ parse_variable_call(pm_parser_t *parser) {
  * parser. If it does not match a valid method definition name, then a missing
  * token is returned.
  */
-static inline pm_token_t
+static PRISM_INLINE pm_token_t
 parse_method_definition_name(pm_parser_t *parser) {
     switch (parser->current.type) {
         case PM_CASE_KEYWORD:
@@ -16526,7 +16146,7 @@ parse_method_definition_name(pm_parser_t *parser) {
             parser_lex(parser);
             return parser->previous;
         case PM_TOKEN_IDENTIFIER:
-            pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
+            pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
             parser_lex(parser);
             return parser->previous;
         case PM_CASE_OPERATOR:
@@ -16534,22 +16154,31 @@ parse_method_definition_name(pm_parser_t *parser) {
             parser_lex(parser);
             return parser->previous;
         default:
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
-            return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
+            return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
     }
 }
 
 static void
-parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
-    // Get a reference to the string struct that is being held by the string
-    // node. This is the value we're going to actually manipulate.
-    pm_string_ensure_owned(string);
+parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
+    // Make a writable copy in the arena if the string isn't already writable.
+    // We keep a mutable pointer to the arena memory so we can memmove into it
+    // below without casting away const from the string's source field.
+    uint8_t *writable;
+
+    if (string->type != PM_STRING_OWNED) {
+        size_t length = pm_string_length(string);
+        writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
+        pm_string_constant_init(string, (const char *) writable, length);
+    } else {
+        writable = (uint8_t *) string->source;
+    }
 
     // Now get the bounds of the existing string. We'll use this as a
     // destination to move bytes into. We'll also use it for bounds checking
     // since we don't require that these strings be null terminated.
     size_t dest_length = pm_string_length(string);
-    const uint8_t *source_cursor = (uint8_t *) string->source;
+    const uint8_t *source_cursor = writable;
     const uint8_t *source_end = source_cursor + dest_length;
 
     // We're going to move bytes backward in the string when we get leading
@@ -16573,11 +16202,24 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
         dest_length--;
     }
 
-    memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
+    memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
     string->length = dest_length;
 }
 
 /**
+ * If we end up trimming all of the whitespace from a node and it isn't
+ * part of a line continuation, then we'll drop it from the list entirely.
+ */
+static PRISM_INLINE bool
+heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
+    if (string_node->unescaped.length == 0) {
+        const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
+        return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
+    }
+    return false;
+}
+
+/**
  * Take a heredoc node that is indented by a ~ and trim the leading whitespace.
  */
 static void
@@ -16587,8 +16229,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
     bool dedent_next = true;
 
     // Iterate over all nodes, and trim whitespace accordingly. We're going to
-    // keep around two indices: a read and a write. If we end up trimming all of
-    // the whitespace from a node, then we'll drop it from the list entirely.
+    // keep around two indices: a read and a write.
     size_t write_index = 0;
 
     pm_node_t *node;
@@ -16604,11 +16245,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
 
         pm_string_node_t *string_node = ((pm_string_node_t *) node);
         if (dedent_next) {
-            parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
+            parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
         }
 
-        if (string_node->unescaped.length == 0) {
-            pm_node_destroy(parser, node);
+        if (heredoc_dedent_discard_string_node(parser, string_node)) {
         } else {
             nodes->nodes[write_index++] = node;
         }
@@ -16631,7 +16271,7 @@ parse_strings_empty_content(const uint8_t *location) {
 /**
  * Parse a set of strings that could be concatenated together.
  */
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
 parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
     assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
     bool concating = false;
@@ -16658,16 +16298,14 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
             pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
 
             pm_string_shared_init(&string->unescaped, content.start, content.end);
-            node = (pm_node_t *) string;
+            node = UP(string);
         } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
             // If we get here, then we have an end of a label immediately
             // after a start. In that case we'll create an empty symbol
             // node.
-            pm_token_t content = parse_strings_empty_content(parser->previous.start);
-            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
-
-            pm_string_shared_init(&symbol->unescaped, content.start, content.end);
-            node = (pm_node_t *) symbol;
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
+            pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
+            node = UP(symbol);
 
             if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
         } else if (!lex_interpolation) {
@@ -16678,7 +16316,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
 
             if (match1(parser, PM_TOKEN_EOF)) {
                 unescaped = PM_STRING_EMPTY;
-                content = not_provided(parser);
+                content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
             } else {
                 unescaped = parser->current_string;
                 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
@@ -16698,34 +16336,30 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
             // be able to contain all of the parts.
             if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
                 pm_node_list_t parts = { 0 };
-
-                pm_token_t delimiters = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
-                pm_node_list_append(&parts, part);
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+                pm_node_list_append(parser->arena, &parts, part);
 
                 do {
-                    part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
-                    pm_node_list_append(&parts, part);
+                    part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                    pm_node_list_append(parser->arena, &parts, part);
                     parser_lex(parser);
                 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
 
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
-                node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-
-                pm_node_list_free(&parts);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
             } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+                node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
                 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
             } else if (match1(parser, PM_TOKEN_EOF)) {
                 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
-                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
             } else if (accept1(parser, PM_TOKEN_STRING_END)) {
-                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
             } else {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
                 parser->previous.start = parser->previous.end;
-                parser->previous.type = PM_TOKEN_MISSING;
-                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+                parser->previous.type = 0;
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
             }
         } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
             // In this case we've hit string content so we know the string
@@ -16737,7 +16371,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
             parser_lex(parser);
 
             if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
                 pm_node_flag_set(node, parse_unescaped_encoding(parser));
 
                 // Kind of odd behavior, but basically if we have an
@@ -16747,43 +16381,38 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
                 if (!accept1(parser, PM_TOKEN_STRING_END)) {
                     const uint8_t *location = parser->previous.end;
                     if (location > parser->start && location[-1] == '\n') location--;
-                    pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+                    pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
 
                     parser->previous.start = parser->previous.end;
-                    parser->previous.type = PM_TOKEN_MISSING;
+                    parser->previous.type = 0;
                 }
             } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+                node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
                 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
             } else {
                 // If we get here, then we have interpolation so we'll need
                 // to create a string or symbol node with interpolation.
                 pm_node_list_t parts = { 0 };
-                pm_token_t string_opening = not_provided(parser);
-                pm_token_t string_closing = not_provided(parser);
-
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
                 pm_node_flag_set(part, parse_unescaped_encoding(parser));
-                pm_node_list_append(&parts, part);
+                pm_node_list_append(parser->arena, &parts, part);
 
                 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
                     if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                        pm_node_list_append(&parts, part);
+                        pm_node_list_append(parser->arena, &parts, part);
                     }
                 }
 
                 if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                    node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+                    node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
                     if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
                 } else if (match1(parser, PM_TOKEN_EOF)) {
                     pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
-                    node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+                    node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
                 } else {
                     expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
-                    node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+                    node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
                 }
-
-                pm_node_list_free(&parts);
             }
         } else {
             // If we get here, then the first part of the string is not plain
@@ -16794,22 +16423,20 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
 
             while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
                 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                    pm_node_list_append(&parts, part);
+                    pm_node_list_append(parser->arena, &parts, part);
                 }
             }
 
             if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+                node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
                 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
             } else if (match1(parser, PM_TOKEN_EOF)) {
                 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
-                node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
             } else {
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
-                node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
             }
-
-            pm_node_list_free(&parts);
         }
 
         if (current == NULL) {
@@ -16839,14 +16466,12 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
                 }
 
                 concating = true;
-                pm_token_t bounds = not_provided(parser);
-
-                pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
-                pm_interpolated_string_node_append(container, current);
-                current = (pm_node_t *) container;
+                pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                pm_interpolated_string_node_append(parser, container, current);
+                current = UP(container);
             }
 
-            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+            pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
         }
     }
 
@@ -16868,12 +16493,12 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
 static void
 parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
     // Skip this capture if it starts with an underscore.
-    if (*location->start == '_') return;
+    if (peek_at(parser, parser->start + location->start) == '_') return;
 
     if (pm_constant_id_list_includes(captures, capture)) {
-        pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+        pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
     } else {
-        pm_constant_id_list_append(captures, capture);
+        pm_constant_id_list_append(parser->arena, captures, capture);
     }
 }
 
@@ -16887,7 +16512,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
     while (accept1(parser, PM_TOKEN_COLON_COLON)) {
         pm_token_t delimiter = parser->previous;
         expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-        node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+        node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
     }
 
     // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -16908,7 +16533,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
         if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
             inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
             accept1(parser, PM_TOKEN_NEWLINE);
-            expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+            expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
         }
 
         closing = parser->previous;
@@ -16920,7 +16545,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
         if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
             inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
             accept1(parser, PM_TOKEN_NEWLINE);
-            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
+            expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
         }
 
         closing = parser->previous;
@@ -16929,7 +16554,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
     if (!inner) {
         // If there was no inner pattern, then we have something like Foo() or
         // Foo[]. In that case we'll create an array pattern with no requireds.
-        return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
+        return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
     }
 
     // Now that we have the inner pattern, check to see if it's an array, find,
@@ -16940,15 +16565,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
         case PM_ARRAY_PATTERN_NODE: {
             pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
 
-            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
-                pattern_node->base.location.start = node->location.start;
-                pattern_node->base.location.end = closing.end;
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+                PM_NODE_START_SET_NODE(pattern_node, node);
+                PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
 
                 pattern_node->constant = node;
-                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                pattern_node->opening_loc = TOK2LOC(parser, &opening);
+                pattern_node->closing_loc = TOK2LOC(parser, &closing);
 
-                return (pm_node_t *) pattern_node;
+                return UP(pattern_node);
             }
 
             break;
@@ -16956,15 +16581,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
         case PM_FIND_PATTERN_NODE: {
             pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
 
-            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
-                pattern_node->base.location.start = node->location.start;
-                pattern_node->base.location.end = closing.end;
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+                PM_NODE_START_SET_NODE(pattern_node, node);
+                PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
 
                 pattern_node->constant = node;
-                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                pattern_node->opening_loc = TOK2LOC(parser, &opening);
+                pattern_node->closing_loc = TOK2LOC(parser, &closing);
 
-                return (pm_node_t *) pattern_node;
+                return UP(pattern_node);
             }
 
             break;
@@ -16972,15 +16597,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
         case PM_HASH_PATTERN_NODE: {
             pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
 
-            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
-                pattern_node->base.location.start = node->location.start;
-                pattern_node->base.location.end = closing.end;
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+                PM_NODE_START_SET_NODE(pattern_node, node);
+                PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
 
                 pattern_node->constant = node;
-                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                pattern_node->opening_loc = TOK2LOC(parser, &opening);
+                pattern_node->closing_loc = TOK2LOC(parser, &closing);
 
-                return (pm_node_t *) pattern_node;
+                return UP(pattern_node);
             }
 
             break;
@@ -16993,8 +16618,8 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
     // attaching its constant. In this case we'll create an array pattern and
     // attach our constant to it.
     pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
-    pm_array_pattern_node_requireds_append(pattern_node, inner);
-    return (pm_node_t *) pattern_node;
+    pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
+    return UP(pattern_node);
 }
 
 /**
@@ -17010,21 +16635,20 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
     // will check for that here. If they do, then we'll add it to the local
     // table since this pattern will cause it to become a local variable.
     if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
-        pm_token_t identifier = parser->previous;
-        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
 
         int depth;
         if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
-            pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
+            pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
         }
 
-        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
-        name = (pm_node_t *) pm_local_variable_target_node_create(
+        parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+        name = UP(pm_local_variable_target_node_create(
             parser,
-            &PM_LOCATION_TOKEN_VALUE(&identifier),
+            &TOK2LOC(parser, &parser->previous),
             constant_id,
             (uint32_t) (depth == -1 ? 0 : depth)
-        );
+        ));
     }
 
     // Finally we can return the created node.
@@ -17043,7 +16667,7 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
     pm_node_t *value = NULL;
 
     if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
-        return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+        return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
     }
 
     if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -17054,16 +16678,16 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
             pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
         }
 
-        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
-        value = (pm_node_t *) pm_local_variable_target_node_create(
+        parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+        value = UP(pm_local_variable_target_node_create(
             parser,
-            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            &TOK2LOC(parser, &parser->previous),
             constant_id,
             (uint32_t) (depth == -1 ? 0 : depth)
-        );
+        ));
     }
 
-    return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+    return UP(pm_assoc_splat_node_create(parser, value, &operator));
 }
 
 /**
@@ -17100,22 +16724,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
 static pm_node_t *
 parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
     const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
+    const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
+    const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
 
-    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+    pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
     int depth = -1;
 
-    if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
+    if (pm_slice_is_valid_local(parser, start, end)) {
         depth = pm_parser_local_depth_constant_id(parser, constant_id);
     } else {
-        pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+        pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
 
-        if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
-            PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+        if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
+            PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
         }
     }
 
     if (depth == -1) {
-        pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
+        pm_parser_local_add(parser, constant_id, start, end, 0);
     }
 
     parse_pattern_capture(parser, captures, constant_id, value_loc);
@@ -17126,7 +16752,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
         (uint32_t) (depth == -1 ? 0 : depth)
     );
 
-    return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
+    return UP(pm_implicit_node_create(parser, UP(target)));
 }
 
 /**
@@ -17135,7 +16761,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
  */
 static void
 parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
-    if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
+    if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
         pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
     }
 }
@@ -17154,25 +16780,31 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
         case PM_NO_KEYWORDS_PARAMETER_NODE:
             rest = first_node;
             break;
+        case PM_INTERPOLATED_SYMBOL_NODE:
         case PM_SYMBOL_NODE: {
-            if (pm_symbol_node_label_p(first_node)) {
-                parse_pattern_hash_key(parser, &keys, first_node);
+            if (pm_symbol_node_label_p(parser, first_node)) {
+                if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+                } else {
+                    parse_pattern_hash_key(parser, &keys, first_node);
+                }
+
                 pm_node_t *value;
 
                 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
-                    // Otherwise, we will create an implicit local variable
-                    // target for the value.
-                    value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+                    if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
+                        value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+                    } else {
+                        value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
+                    }
                 } else {
                     // Here we have a value for the first assoc in the list, so
                     // we will parse it now.
                     value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
                 }
 
-                pm_token_t operator = not_provided(parser);
-                pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
-
-                pm_node_list_append(&assocs, assoc);
+                pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
+                pm_node_list_append(parser->arena, &assocs, assoc);
                 break;
             }
         }
@@ -17184,11 +16816,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
             pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
             pm_parser_err_node(parser, first_node, diag_id);
 
-            pm_token_t operator = not_provided(parser);
-            pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
-            pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
+            pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
+            pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
 
-            pm_node_list_append(&assocs, assoc);
+            pm_node_list_append(parser->arena, &assocs, assoc);
             break;
         }
     }
@@ -17212,7 +16843,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
                 rest = assoc;
             } else {
                 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
-                pm_node_list_append(&assocs, assoc);
+                pm_node_list_append(parser->arena, &assocs, assoc);
             }
         } else {
             pm_node_t *key;
@@ -17222,36 +16853,43 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
 
                 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
                     pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
-                } else if (!pm_symbol_node_label_p(key)) {
+                } else if (!pm_symbol_node_label_p(parser, key)) {
                     pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
                 }
+            } else if (accept1(parser, PM_TOKEN_LABEL)) {
+                key = UP(pm_symbol_node_label_create(parser, &parser->previous));
             } else {
                 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
-                key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+
+                pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
+                key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
             }
 
             parse_pattern_hash_key(parser, &keys, key);
             pm_node_t *value = NULL;
 
             if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+                if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
+                    value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+                } else {
+                    value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
+                }
             } else {
                 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
             }
 
-            pm_token_t operator = not_provided(parser);
-            pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+            pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
 
             if (rest != NULL) {
                 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
             }
 
-            pm_node_list_append(&assocs, assoc);
+            pm_node_list_append(parser->arena, &assocs, assoc);
         }
     }
 
     pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
-    xfree(assocs.nodes);
+    // assocs.nodes is arena-allocated; no explicit free needed.
 
     pm_static_literals_free(&keys);
     return node;
@@ -17273,13 +16911,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
                 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
             }
 
-            parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
-            return (pm_node_t *) pm_local_variable_target_node_create(
+            parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+            return UP(pm_local_variable_target_node_create(
                 parser,
-                &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+                &TOK2LOC(parser, &parser->previous),
                 constant_id,
                 (uint32_t) (depth == -1 ? 0 : depth)
-            );
+            ));
         }
         case PM_TOKEN_BRACKET_LEFT_ARRAY: {
             pm_token_t opening = parser->current;
@@ -17288,7 +16926,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
                 // If we have an empty array pattern, then we'll just return a new
                 // array pattern node.
-                return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
+                return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
             }
 
             // Otherwise, we'll parse the inner pattern, then deal with it depending
@@ -17296,34 +16934,34 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
 
             accept1(parser, PM_TOKEN_NEWLINE);
-            expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+            expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
             pm_token_t closing = parser->previous;
 
             switch (PM_NODE_TYPE(inner)) {
                 case PM_ARRAY_PATTERN_NODE: {
                     pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
-                    if (pattern_node->opening_loc.start == NULL) {
-                        pattern_node->base.location.start = opening.start;
-                        pattern_node->base.location.end = closing.end;
+                    if (pattern_node->opening_loc.length == 0) {
+                        PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+                        PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
 
-                        pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                        pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                        pattern_node->opening_loc = TOK2LOC(parser, &opening);
+                        pattern_node->closing_loc = TOK2LOC(parser, &closing);
 
-                        return (pm_node_t *) pattern_node;
+                        return UP(pattern_node);
                     }
 
                     break;
                 }
                 case PM_FIND_PATTERN_NODE: {
                     pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
-                    if (pattern_node->opening_loc.start == NULL) {
-                        pattern_node->base.location.start = opening.start;
-                        pattern_node->base.location.end = closing.end;
+                    if (pattern_node->opening_loc.length == 0) {
+                        PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+                        PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
 
-                        pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                        pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                        pattern_node->opening_loc = TOK2LOC(parser, &opening);
+                        pattern_node->closing_loc = TOK2LOC(parser, &closing);
 
-                        return (pm_node_t *) pattern_node;
+                        return UP(pattern_node);
                     }
 
                     break;
@@ -17333,8 +16971,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             }
 
             pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
-            pm_array_pattern_node_requireds_append(node, inner);
-            return (pm_node_t *) node;
+            pm_array_pattern_node_requireds_append(parser->arena, node, inner);
+            return UP(node);
         }
         case PM_TOKEN_BRACE_LEFT: {
             bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17354,19 +16992,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
                 switch (parser->current.type) {
                     case PM_TOKEN_LABEL:
                         parser_lex(parser);
-                        first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+                        first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
                         break;
                     case PM_TOKEN_USTAR_STAR:
                         first_node = parse_pattern_keyword_rest(parser, captures);
                         break;
                     case PM_TOKEN_STRING_BEGIN:
-                        first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
+                        first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
                         break;
                     default: {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
                         parser_lex(parser);
 
-                        first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+                        first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
                         break;
                     }
                 }
@@ -17374,18 +17012,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
                 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
 
                 accept1(parser, PM_TOKEN_NEWLINE);
-                expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
+                expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
                 pm_token_t closing = parser->previous;
 
-                node->base.location.start = opening.start;
-                node->base.location.end = closing.end;
+                PM_NODE_START_SET_TOKEN(parser, node, &opening);
+                PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
 
-                node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+                node->opening_loc = TOK2LOC(parser, &opening);
+                node->closing_loc = TOK2LOC(parser, &closing);
             }
 
             parser->pattern_matching_newlines = previous_pattern_matching_newlines;
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_UDOT_DOT:
         case PM_TOKEN_UDOT_DOT_DOT: {
@@ -17396,21 +17034,27 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             // expression as the right side of the range.
             switch (parser->current.type) {
                 case PM_CASE_PRIMITIVE: {
-                    pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
-                    return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+                    pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+                    return UP(pm_range_node_create(parser, NULL, &operator, right));
                 }
                 default: {
                     pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
-                    pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
-                    return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+                    pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+                    return UP(pm_range_node_create(parser, NULL, &operator, right));
                 }
             }
         }
         case PM_CASE_PRIMITIVE: {
-            pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
+            pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
 
             // If we found a label, we need to immediately return to the caller.
-            if (pm_symbol_node_label_p(node)) return node;
+            if (pm_symbol_node_label_p(parser, node)) return node;
+
+            // Call nodes (arithmetic operations) are not allowed in patterns
+            if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
+                pm_parser_err_node(parser, node, diag_id);
+                return UP(pm_error_recovery_node_create_unexpected(parser, node));
+            }
 
             // Now that we have a primitive, we need to check if it's part of a range.
             if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -17421,11 +17065,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
                 // node. Otherwise, we'll create an endless range.
                 switch (parser->current.type) {
                     case PM_CASE_PRIMITIVE: {
-                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
-                        return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
+                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+                        return UP(pm_range_node_create(parser, node, &operator, right));
                     }
                     default:
-                        return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
+                        return UP(pm_range_node_create(parser, node, &operator, NULL));
                 }
             }
 
@@ -17440,44 +17084,44 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             switch (parser->current.type) {
                 case PM_TOKEN_IDENTIFIER: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) parse_variable(parser);
+                    pm_node_t *variable = UP(parse_variable(parser));
 
                     if (variable == NULL) {
-                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
-                        variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
+                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+                        variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
                     }
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_INSTANCE_VARIABLE: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+                    pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_CLASS_VARIABLE: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+                    pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_GLOBAL_VARIABLE: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+                    pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_NUMBERED_REFERENCE: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+                    pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_BACK_REFERENCE: {
                     parser_lex(parser);
-                    pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+                    pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
 
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
                 case PM_TOKEN_PARENTHESIS_LEFT: {
                     bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17486,19 +17130,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
                     pm_token_t lparen = parser->current;
                     parser_lex(parser);
 
-                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
                     parser->pattern_matching_newlines = previous_pattern_matching_newlines;
 
                     accept1(parser, PM_TOKEN_NEWLINE);
-                    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
-                    return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
+                    expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
+                    return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
                 }
                 default: {
                     // If we get here, then we have a pin operator followed by something
                     // not understood. We'll create a missing node and return that.
                     pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
-                    pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
-                    return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+                    pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
                 }
             }
         }
@@ -17509,31 +17153,56 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
             expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
             pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
 
-            return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
+            return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
         }
         case PM_TOKEN_CONSTANT: {
             pm_token_t constant = parser->current;
             parser_lex(parser);
 
-            pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+            pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
             return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
         }
         default:
             pm_parser_err_current(parser, diag_id);
-            return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+            return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+    }
+}
+
+static bool
+parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_LOCAL_VARIABLE_TARGET_NODE: {
+            pm_parser_t *parser = (pm_parser_t *) data;
+            pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
+            return false;
+        }
+        default:
+            return true;
     }
 }
 
 /**
+ * When we get here, we know that we already have a syntax error, because we
+ * know we have captured a variable and that we are in an alternation.
+ */
+static void
+parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
+    pm_visit_node(node, parse_pattern_alternation_error_each, parser);
+}
+
+/**
  * Parse any number of primitives joined by alternation and ended optionally by
  * assignment.
  */
 static pm_node_t *
 parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
     pm_node_t *node = first_node;
+    bool alternation = false;
 
-    while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
-        pm_token_t operator = parser->previous;
+    while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
+        if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
+            parse_pattern_alternation_error(parser, node);
+        }
 
         switch (parser->current.type) {
             case PM_TOKEN_IDENTIFIER:
@@ -17545,41 +17214,47 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
             case PM_TOKEN_UDOT_DOT:
             case PM_TOKEN_UDOT_DOT_DOT:
             case PM_CASE_PRIMITIVE: {
-                if (node == NULL) {
+                if (!alternation) {
                     node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
                 } else {
+                    pm_token_t operator = parser->previous;
                     pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
-                    node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
                 }
 
                 break;
             }
             case PM_TOKEN_PARENTHESIS_LEFT:
             case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+                pm_token_t operator = parser->previous;
                 pm_token_t opening = parser->current;
                 parser_lex(parser);
 
                 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
                 accept1(parser, PM_TOKEN_NEWLINE);
-                expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
-                pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
+                expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+                pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
 
-                if (node == NULL) {
+                if (!alternation) {
                     node = right;
                 } else {
-                    node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
                 }
 
                 break;
             }
             default: {
                 pm_parser_err_current(parser, diag_id);
-                pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+                pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
 
-                if (node == NULL) {
+                if (!alternation) {
                     node = right;
                 } else {
-                    node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
                 }
 
                 break;
@@ -17600,15 +17275,15 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
             pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
         }
 
-        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+        parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
         pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
             parser,
-            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            &TOK2LOC(parser, &parser->previous),
             constant_id,
             (uint32_t) (depth == -1 ? 0 : depth)
         );
 
-        node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
+        node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
     }
 
     return node;
@@ -17627,8 +17302,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
     switch (parser->current.type) {
         case PM_TOKEN_LABEL: {
             parser_lex(parser);
-            pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
-            node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
+            pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+            node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
 
             if (!(flags & PM_PARSE_PATTERN_TOP)) {
                 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17638,7 +17313,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
         }
         case PM_TOKEN_USTAR_STAR: {
             node = parse_pattern_keyword_rest(parser, captures);
-            node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+            node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
 
             if (!(flags & PM_PARSE_PATTERN_TOP)) {
                 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17651,8 +17326,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
             // be dynamic symbols leading to hash patterns.
             node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
 
-            if (pm_symbol_node_label_p(node)) {
-                node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+            if (pm_symbol_node_label_p(parser, node)) {
+                node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
 
                 if (!(flags & PM_PARSE_PATTERN_TOP)) {
                     pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17667,7 +17342,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
         case PM_TOKEN_USTAR: {
             if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
                 parser_lex(parser);
-                node = (pm_node_t *) parse_pattern_rest(parser, captures);
+                node = UP(parse_pattern_rest(parser, captures));
                 leading_rest = true;
                 break;
             }
@@ -17680,8 +17355,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
 
     // If we got a dynamic label symbol, then we need to treat it like the
     // beginning of a hash pattern.
-    if (pm_symbol_node_label_p(node)) {
-        return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+    if (pm_symbol_node_label_p(parser, node)) {
+        return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
     }
 
     if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
@@ -17689,20 +17364,20 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
         // or a find pattern. We need to parse all of the patterns, put them
         // into a big list, and then determine which type of node we have.
         pm_node_list_t nodes = { 0 };
-        pm_node_list_append(&nodes, node);
+        pm_node_list_append(parser->arena, &nodes, node);
 
         // Gather up all of the patterns into the list.
         while (accept1(parser, PM_TOKEN_COMMA)) {
             // Break early here in case we have a trailing comma.
-            if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
-                node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
-                pm_node_list_append(&nodes, node);
+            if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
+                node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+                pm_node_list_append(parser->arena, &nodes, node);
                 trailing_rest = true;
                 break;
             }
 
             if (accept1(parser, PM_TOKEN_USTAR)) {
-                node = (pm_node_t *) parse_pattern_rest(parser, captures);
+                node = UP(parse_pattern_rest(parser, captures));
 
                 // If we have already parsed a splat pattern, then this is an
                 // error. We will continue to parse the rest of the patterns,
@@ -17716,7 +17391,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
                 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
             }
 
-            pm_node_list_append(&nodes, node);
+            pm_node_list_append(parser->arena, &nodes, node);
         }
 
         // If the first pattern and the last pattern are rest patterns, then we
@@ -17724,24 +17399,24 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
         // are in between because we know we already added the appropriate
         // errors. Otherwise we will create an array pattern.
         if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
-            node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
+            node = UP(pm_find_pattern_node_create(parser, &nodes));
 
             if (nodes.size == 2) {
                 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
             }
         } else {
-            node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
+            node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
 
             if (leading_rest && trailing_rest) {
                 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
             }
         }
 
-        xfree(nodes.nodes);
+        // nodes.nodes is arena-allocated; no explicit free needed.
     } else if (leading_rest) {
         // Otherwise, if we parsed a single splat pattern, then we know we have
         // an array pattern, so we can go ahead and create that node.
-        node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
+        node = UP(pm_array_pattern_node_rest_create(parser, node));
     }
 
     return node;
@@ -17752,29 +17427,33 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
  * from its start bounds. If it's a compound node, then we will recursively
  * apply this function to its value.
  */
-static inline void
+static PRISM_INLINE void
 parse_negative_numeric(pm_node_t *node) {
     switch (PM_NODE_TYPE(node)) {
         case PM_INTEGER_NODE: {
             pm_integer_node_t *cast = (pm_integer_node_t *) node;
             cast->base.location.start--;
+            cast->base.location.length++;
             cast->value.negative = true;
             break;
         }
         case PM_FLOAT_NODE: {
             pm_float_node_t *cast = (pm_float_node_t *) node;
             cast->base.location.start--;
+            cast->base.location.length++;
             cast->value = -cast->value;
             break;
         }
         case PM_RATIONAL_NODE: {
             pm_rational_node_t *cast = (pm_rational_node_t *) node;
             cast->base.location.start--;
+            cast->base.location.length++;
             cast->numerator.negative = true;
             break;
         }
         case PM_IMAGINARY_NODE:
             node->location.start--;
+            node->location.length++;
             parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
             break;
         default:
@@ -17792,22 +17471,22 @@ static void
 pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
     switch (diag_id) {
         case PM_ERR_HASH_KEY: {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
             break;
         }
         case PM_ERR_HASH_VALUE:
         case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
             break;
         }
         case PM_ERR_UNARY_RECEIVER: {
-            const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
+            const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
             break;
         }
         case PM_ERR_UNARY_DISALLOWED:
         case PM_ERR_EXPECT_ARGUMENT: {
-            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
             break;
         }
         default:
@@ -17887,6 +17566,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
             case PM_CONTEXT_BEGIN:
             case PM_CONTEXT_BLOCK_BRACES:
             case PM_CONTEXT_BLOCK_KEYWORDS:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
             case PM_CONTEXT_CASE_IN:
             case PM_CONTEXT_CASE_WHEN:
             case PM_CONTEXT_DEFAULT_PARAMS:
@@ -17967,6 +17647,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
             case PM_CONTEXT_BLOCK_KEYWORDS:
             case PM_CONTEXT_BLOCK_ELSE:
             case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
             case PM_CONTEXT_BLOCK_RESCUE:
             case PM_CONTEXT_CASE_IN:
             case PM_CONTEXT_CASE_WHEN:
@@ -18003,67 +17684,1383 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
 }
 
 /**
- * This struct is used to pass information between the regular expression parser
- * and the error callback.
+ * Determine if a given call node looks like a "command", which means it has
+ * arguments but does not have parentheses.
  */
-typedef struct {
-    /** The parser that we are parsing the regular expression for. */
-    pm_parser_t *parser;
+static PRISM_INLINE bool
+pm_call_node_command_p(const pm_call_node_t *node) {
+    return (
+        (node->opening_loc.length == 0) &&
+        (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
+        (node->arguments != NULL || node->block != NULL)
+    );
+}
 
-    /** The start of the regular expression. */
-    const uint8_t *start;
+/**
+ * Returns true if the given node is a command-style call (a method call without
+ * parentheses that has arguments), excluding operator calls (e.g., a + b) which
+ * satisfy the same structural criteria but are not commands.
+ */
+static bool
+pm_command_call_value_p(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_CALL_NODE: {
+            const pm_call_node_t *call = (const pm_call_node_t *) node;
 
-    /** The end of the regular expression. */
-    const uint8_t *end;
+            // Command-style calls (e.g., foo bar, obj.foo bar).
+            // Attribute writes (e.g., a.b = 1) are not commands.
+            if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
+                return true;
+            }
 
-    /**
-     * Whether or not the source of the regular expression is shared. This
-     * impacts the location of error messages, because if it is shared then we
-     * can use the location directly and if it is not, then we use the bounds of
-     * the regular expression itself.
-     */
-    bool shared;
-} parse_regular_expression_error_data_t;
+            // A `!` or `not` prefix wrapping a command call (e.g.,
+            // `!foo bar`, `not foo bar`) is also a command-call value.
+            if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
+                return pm_command_call_value_p(call->receiver);
+            }
+
+            return false;
+        }
+        case PM_SUPER_NODE: {
+            const pm_super_node_t *cast = (const pm_super_node_t *) node;
+            return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
+        }
+        case PM_YIELD_NODE: {
+            const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
+            return cast->lparen_loc.length == 0 && cast->arguments != NULL;
+        }
+        case PM_RESCUE_MODIFIER_NODE:
+            return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
+        case PM_DEF_NODE: {
+            const pm_def_node_t *cast = (const pm_def_node_t *) node;
+            if (cast->equal_loc.length > 0 && cast->body != NULL) {
+                const pm_node_t *body = cast->body;
+                if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
+                    body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
+                }
+                return pm_command_call_value_p(body);
+            }
+            return false;
+        }
+        default:
+            return false;
+    }
+}
 
 /**
- * This callback is called when the regular expression parser encounters a
- * syntax error.
+ * Returns true if the given node is a block call: a command
+ * with a do-block, or any call chained (via `.`, `::`, `&.`) from such a node.
+ * Block calls can only be followed by call chaining, composition (and/or), and
+ * modifier operators.
  */
-static void
-parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
-    parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
-    pm_location_t location;
+static bool
+pm_block_call_p(const pm_node_t *node) {
+    while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
+        const pm_call_node_t *call = (const pm_call_node_t *) node;
+        if (call->opening_loc.length > 0) return false;
+
+        // Root: command with do-block (e.g., `foo bar do end`).
+        if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
+            return true;
+        }
 
-    if (callback_data->shared) {
-        location = (pm_location_t) { .start = start, .end = end };
+        // Walk up the receiver chain (e.g., `foo bar do end.baz`).
+        if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
+            node = call->receiver;
+            continue;
+        }
+
+        return false;
+    }
+
+    return false;
+}
+
+/**
+ * Parse a case expression (the `case` keyword). This handles both case-when and
+ * case-in (pattern matching) forms.
+ */
+static pm_node_t *
+parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+
+    pm_token_t case_keyword = parser->previous;
+    pm_node_t *predicate = NULL;
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+        while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+        predicate = NULL;
+    } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
+        predicate = NULL;
+     } else if (!token_begins_expression_p(parser->current.type)) {
+        predicate = NULL;
+    } else {
+        predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
+        while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+    }
+
+    if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+        parser_lex(parser);
+        pop_block_exits(parser, previous_block_exits);
+        pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
+    }
+
+    /* At this point we can create a case node, though we don't yet know if it
+     * is a case-in or case-when node. */
+    pm_node_t *node;
+
+    if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+        pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
+        pm_static_literals_t literals = { 0 };
+
+        /* At this point we've seen a when keyword, so we know this is a
+         * case-when node. We will continue to parse the when nodes until we hit
+         * the end of the list. */
+        while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+            parser_lex(parser);
+
+            pm_token_t when_keyword = parser->previous;
+            pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
+
+            do {
+                if (accept1(parser, PM_TOKEN_USTAR)) {
+                    pm_token_t operator = parser->previous;
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+
+                    pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
+                    pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
+
+                    if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
+                } else {
+                    pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
+                    pm_when_node_conditions_append(parser->arena, when_node, condition);
+
+                    /* If we found a missing node, then this is a syntax error
+                     * and we should stop looping. */
+                    if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
+
+                    /* If this is a string node, then we need to mark it as
+                     * frozen because when clause strings are frozen. */
+                    if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+                        pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+                    } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+                        pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+                    }
+
+                    pm_when_clause_static_literals_add(parser, &literals, condition);
+                }
+            } while (accept1(parser, PM_TOKEN_COMMA));
+
+            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                    pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+                }
+            } else {
+                expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+                pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+            }
+
+            if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
+                if (statements != NULL) {
+                    pm_when_node_statements_set(when_node, statements);
+                }
+            }
+
+            pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
+        }
+
+        /* If we didn't parse any conditions (in or when) then we need to
+         * indicate that we have an error. */
+        if (case_node->conditions.size == 0) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        }
+
+        pm_static_literals_free(&literals);
+        node = UP(case_node);
+    } else {
+        pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
+
+        /* If this is a case-match node (i.e., it is a pattern matching case
+         * statement) then we must have a predicate. */
+        if (predicate == NULL) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
+        }
+
+        /* At this point we expect that we're parsing a case-in node. We will
+         * continue to parse the in nodes until we hit the end of the list. */
+        while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
+            bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+            parser->pattern_matching_newlines = true;
+
+            lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+            parser->command_start = false;
+            parser_lex(parser);
+
+            pm_token_t in_keyword = parser->previous;
+
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+            parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+
+            /* Since we're in the top-level of the case-in node we need to
+             * check for guard clauses in the form of `if` or `unless`
+             * statements. */
+            if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
+                pm_token_t keyword = parser->previous;
+                pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+                pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
+            } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
+                pm_token_t keyword = parser->previous;
+                pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+                pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
+            }
+
+            /* Now we need to check for the terminator of the in node's pattern.
+             * It can be a newline or semicolon optionally followed by a `then`
+             * keyword. */
+            pm_token_t then_keyword = { 0 };
+            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                    then_keyword = parser->previous;
+                }
+            } else {
+                expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
+                then_keyword = parser->previous;
+            }
+
+            /* Now we can actually parse the statements associated with the in
+             * node. */
+            pm_statements_node_t *statements;
+            if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                statements = NULL;
+            } else {
+                statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
+            }
+
+            /* Now that we have the full pattern and statements, we can create
+             * the node and attach it to the case node. */
+            pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
+            pm_case_match_node_condition_append(parser->arena, case_node, condition);
+        }
+
+        /* If we didn't parse any conditions (in or when) then we need to
+         * indicate that we have an error. */
+        if (case_node->conditions.size == 0) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        }
+
+        node = UP(case_node);
+    }
+
+    accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+    if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+        pm_token_t else_keyword = parser->previous;
+        pm_else_node_t *else_node;
+
+        if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+            else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
+        } else {
+            else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
+        }
+
+        if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+            pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
+        } else {
+            pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
+        }
+    }
+
+    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
+    if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+        pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
     } else {
-        location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+        pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
     }
 
-    PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+    pop_block_exits(parser, previous_block_exits);
+    return node;
 }
 
 /**
- * Parse the errors for the regular expression and add them to the parser.
+ * Parse a class definition expression (the `class` keyword). This handles both
+ * regular class definitions and singleton class definitions (`class << expr`).
  */
-static void
-parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
-    const pm_string_t *unescaped = &node->unescaped;
-    parse_regular_expression_error_data_t error_data = {
-        .parser = parser,
-        .start = node->base.location.start,
-        .end = node->base.location.end,
-        .shared = unescaped->type == PM_STRING_SHARED
-    };
+static pm_node_t *
+parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+
+    pm_token_t class_keyword = parser->previous;
+    pm_do_loop_stack_push(parser, false);
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    if (accept1(parser, PM_TOKEN_LESS_LESS)) {
+        pm_token_t operator = parser->previous;
+        pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
+
+        pm_parser_scope_push(parser, true);
+        if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
+        }
+
+        pm_node_t *statements = NULL;
+        if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
+            pm_accepts_block_stack_pop(parser);
+        }
+
+        if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+            assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+            statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+        } else {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+        }
+
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+        pm_constant_id_list_t locals;
+        pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+        pm_parser_scope_pop(parser);
+        pm_do_loop_stack_pop(parser);
+
+        flush_block_exits(parser, previous_block_exits);
+        return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
+    }
+
+    pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
+    pm_token_t name = parser->previous;
+    if (name.type != PM_TOKEN_CONSTANT) {
+        pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
+    }
+
+    pm_token_t inheritance_operator = { 0 };
+    pm_node_t *superclass;
+
+    if (match1(parser, PM_TOKEN_LESS)) {
+        inheritance_operator = parser->current;
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+
+        parser->command_start = true;
+        parser_lex(parser);
+
+        superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
+    } else {
+        superclass = NULL;
+    }
+
+    pm_parser_scope_push(parser, true);
+
+    if (inheritance_operator.start != NULL) {
+        expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
+    } else {
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+    }
+    pm_node_t *statements = NULL;
+
+    if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+        pm_accepts_block_stack_push(parser, true);
+        statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
+        pm_accepts_block_stack_pop(parser);
+    }
+
+    if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+        assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+        statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+    } else {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+    }
+
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+    if (context_def_p(parser)) {
+        pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
+    }
 
-    pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+    pm_parser_scope_pop(parser);
+    pm_do_loop_stack_pop(parser);
+
+    if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
+        pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
+        if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+            constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+        }
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
+}
+
+/**
+ * Parse a method definition expression (the `def` keyword).
+ */
+static pm_node_t *
+parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    pm_token_t def_keyword = parser->current;
+    size_t opening_newline_index = token_newline_index(parser);
+
+    pm_node_t *receiver = NULL;
+    pm_token_t operator = { 0 };
+    pm_token_t name;
+
+    /* This context is necessary for lexing `...` in a bare params correctly. It
+     * must be pushed before lexing the first param, so it is here. */
+    context_push(parser, PM_CONTEXT_DEF_PARAMS);
+    parser_lex(parser);
+
+    /* This will be false if the method name is not a valid identifier but could
+     * be followed by an operator. */
+    bool valid_name = true;
+
+    switch (parser->current.type) {
+        case PM_CASE_OPERATOR:
+            pm_parser_scope_push(parser, true);
+            lex_state_set(parser, PM_LEX_STATE_ENDFN);
+            parser_lex(parser);
+
+            name = parser->previous;
+            break;
+        case PM_TOKEN_IDENTIFIER: {
+            parser_lex(parser);
+
+            if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                receiver = parse_variable_call(parser);
+
+                pm_parser_scope_push(parser, true);
+                lex_state_set(parser, PM_LEX_STATE_FNAME);
+                parser_lex(parser);
+
+                operator = parser->previous;
+                name = parse_method_definition_name(parser);
+            } else {
+                pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
+                pm_parser_scope_push(parser, true);
+
+                name = parser->previous;
+            }
+
+            break;
+        }
+        case PM_TOKEN_INSTANCE_VARIABLE:
+        case PM_TOKEN_CLASS_VARIABLE:
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            valid_name = false;
+            PRISM_FALLTHROUGH
+        case PM_TOKEN_CONSTANT:
+        case PM_TOKEN_KEYWORD_NIL:
+        case PM_TOKEN_KEYWORD_SELF:
+        case PM_TOKEN_KEYWORD_TRUE:
+        case PM_TOKEN_KEYWORD_FALSE:
+        case PM_TOKEN_KEYWORD___FILE__:
+        case PM_TOKEN_KEYWORD___LINE__:
+        case PM_TOKEN_KEYWORD___ENCODING__: {
+            pm_parser_scope_push(parser, true);
+            parser_lex(parser);
+
+            pm_token_t identifier = parser->previous;
+
+            if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                lex_state_set(parser, PM_LEX_STATE_FNAME);
+                parser_lex(parser);
+                operator = parser->previous;
+
+                switch (identifier.type) {
+                    case PM_TOKEN_CONSTANT:
+                        receiver = UP(pm_constant_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_INSTANCE_VARIABLE:
+                        receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_CLASS_VARIABLE:
+                        receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_GLOBAL_VARIABLE:
+                        receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_NIL:
+                        receiver = UP(pm_nil_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_SELF:
+                        receiver = UP(pm_self_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_TRUE:
+                        receiver = UP(pm_true_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_FALSE:
+                        receiver = UP(pm_false_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___FILE__:
+                        receiver = UP(pm_source_file_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___LINE__:
+                        receiver = UP(pm_source_line_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___ENCODING__:
+                        receiver = UP(pm_source_encoding_node_create(parser, &identifier));
+                        break;
+                    default:
+                        break;
+                }
+
+                name = parse_method_definition_name(parser);
+            } else {
+                if (!valid_name) {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
+                }
+
+                name = identifier;
+            }
+            break;
+        }
+        case PM_TOKEN_PARENTHESIS_LEFT: {
+            /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
+             * expression of this parenthesis should not be processed under this
+             * context. Thus, the context is popped here. */
+            context_pop(parser);
+            parser_lex(parser);
+
+            pm_token_t lparen = parser->previous;
+            pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+            pm_token_t rparen = parser->previous;
+
+            lex_state_set(parser, PM_LEX_STATE_FNAME);
+            expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
+
+            operator = parser->previous;
+            receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
+
+            /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
+             * described the above. */
+            pm_parser_scope_push(parser, true);
+            context_push(parser, PM_CONTEXT_DEF_PARAMS);
+            name = parse_method_definition_name(parser);
+            break;
+        }
+        default:
+            pm_parser_scope_push(parser, true);
+            name = parse_method_definition_name(parser);
+            break;
+    }
+
+    pm_token_t lparen = { 0 };
+    pm_token_t rparen = { 0 };
+    pm_parameters_node_t *params;
+
+    bool accept_endless_def = true;
+    switch (parser->current.type) {
+        case PM_TOKEN_PARENTHESIS_LEFT: {
+            parser_lex(parser);
+            lparen = parser->previous;
+
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                params = NULL;
+            } else {
+                /* https://bugs.ruby-lang.org/issues/19107 */
+                bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
+                params = parse_parameters(
+                    parser,
+                    PM_BINDING_POWER_DEFINED,
+                    true,
+                    allow_trailing_comma,
+                    true,
+                    true,
+                    false,
+                    PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+                    (uint16_t) (depth + 1)
+                );
+            }
+
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser->command_start = true;
+
+            context_pop(parser);
+            if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
+                parser->previous.start = parser->previous.end;
+                parser->previous.type = 0;
+            }
+
+            rparen = parser->previous;
+            break;
+        }
+        case PM_CASE_PARAMETER: {
+            /* If we're about to lex a label, we need to add the label state to
+             * make sure the next newline is ignored. */
+            if (parser->current.type == PM_TOKEN_LABEL) {
+                lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
+            }
+
+            params = parse_parameters(
+                parser,
+                PM_BINDING_POWER_DEFINED,
+                false,
+                false,
+                true,
+                true,
+                false,
+                PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+                (uint16_t) (depth + 1)
+            );
+
+            /* Reject `def * = 1` and similar. We have to specifically check for
+             * them because they create ambiguity with optional arguments. */
+            accept_endless_def = false;
+
+            context_pop(parser);
+            break;
+        }
+        default: {
+            params = NULL;
+            context_pop(parser);
+            break;
+        }
+    }
+
+    pm_node_t *statements = NULL;
+    pm_token_t equal = { 0 };
+    pm_token_t end_keyword = { 0 };
+
+    if (accept1(parser, PM_TOKEN_EQUAL)) {
+        if (token_is_setter_name(&name)) {
+            pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
+        }
+        if (!accept_endless_def) {
+            pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+        }
+        if (
+            parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+            parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+        ) {
+            PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+        }
+        equal = parser->previous;
+
+        context_push(parser, PM_CONTEXT_DEF);
+        pm_do_loop_stack_push(parser, false);
+        statements = UP(pm_statements_node_create(parser));
+
+        uint8_t allow_flags;
+        if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+            allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
+        } else {
+            /* Allow `def foo = puts "Hello"` but not
+             * `private def foo = puts "Hello"` */
+            allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
+        }
+
+        /* Inside a def body, we push true onto the accepts_block_stack so that
+         * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
+         * for primary-level constructs, not commands). During command argument
+         * parsing, the stack is pushed to false, causing `do` to be lexed as
+         * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
+         * def body and instead left for the outer context. */
+        pm_accepts_block_stack_push(parser, true);
+        pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
+        pm_accepts_block_stack_pop(parser);
+
+        /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
+         * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
+         * intentionally not caught here — it should bubble up to the outer
+         * context (e.g., `private def f = puts "Hello" do end` where the block
+         * attaches to `private`). */
+        if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+            pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
+            pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
+        }
+
+        if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+            context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+            pm_token_t rescue_keyword = parser->previous;
+
+            /* In the Ruby grammar, the rescue value of an endless method
+             * command excludes and/or and in/=>. */
+            pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+            context_pop(parser);
+
+            statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
+        }
+
+        /* A nested endless def whose body is a command call (e.g.,
+         * `def f = def g = foo bar`) is a command assignment and cannot appear
+         * as a def body. */
+        if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
+            PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        }
+
+        pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
+        pm_do_loop_stack_pop(parser);
+        context_pop(parser);
+    } else {
+        if (lparen.start == NULL) {
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser->command_start = true;
+            expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
+        } else {
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        }
+
+        pm_accepts_block_stack_push(parser, true);
+        pm_do_loop_stack_push(parser, false);
+
+        if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
+            pm_accepts_block_stack_pop(parser);
+        }
+
+        if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+            assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+            statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+        } else {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
+        }
+
+        pm_accepts_block_stack_pop(parser);
+        pm_do_loop_stack_pop(parser);
+
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
+        end_keyword = parser->previous;
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+    pm_parser_scope_pop(parser);
+
+    /* If the final character is `@` as is the case when defining methods to
+     * override the unary operators, we should ignore the @ in the same way we
+     * do for symbols. */
+    pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
+
+    flush_block_exits(parser, previous_block_exits);
+
+    return UP(pm_def_node_create(
+        parser,
+        name_id,
+        &name,
+        receiver,
+        params,
+        statements,
+        &locals,
+        &def_keyword,
+        NTOK2PTR(operator),
+        NTOK2PTR(lparen),
+        NTOK2PTR(rparen),
+        NTOK2PTR(equal),
+        NTOK2PTR(end_keyword)
+    ));
+}
+
+/**
+ * Parse a module definition expression (the `module` keyword).
+ */
+static pm_node_t *
+parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+    pm_token_t module_keyword = parser->previous;
+
+    pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
+    pm_token_t name;
+
+    /* If we can recover from a syntax error that occurred while parsing the
+     * name of the module, then we'll handle that here. */
+    if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+        pop_block_exits(parser, previous_block_exits);
+
+        pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+        return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
+    }
+
+    while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+        pm_token_t double_colon = parser->previous;
+
+        expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+        constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
+    }
+
+    /* Here we retrieve the name of the module. If it wasn't a constant, then
+     * it's possible that `module foo` was passed, which is a syntax error. We
+     * handle that here as well. */
+    name = parser->previous;
+    if (name.type != PM_TOKEN_CONSTANT) {
+        pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
+    }
+
+    if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+        constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+    }
+
+    pm_parser_scope_push(parser, true);
+    accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
+    pm_node_t *statements = NULL;
+
+    if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+        pm_accepts_block_stack_push(parser, true);
+        statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
+        pm_accepts_block_stack_pop(parser);
+    }
+
+    if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+        assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+        statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+    } else {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+    pm_parser_scope_pop(parser);
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
+
+    if (context_def_p(parser)) {
+        pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+
+    return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
+}
+
+/**
+ * Parse an interpolated word array literal (`%W[...]`).
+ */
+static pm_node_t *
+parse_string_array(pm_parser_t *parser, uint16_t depth) {
+    parser_lex(parser);
+    pm_token_t opening = parser->previous;
+    pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+    /* This is the current node that we are parsing that will be added to the
+     * list of elements. */
+    pm_node_t *current = NULL;
+
+    while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+        switch (parser->current.type) {
+            case PM_TOKEN_WORDS_SEP: {
+                /* Reset the explicit encoding if we hit a separator since each
+                 * element can have its own encoding. */
+                parser->explicit_encoding = NULL;
+
+                if (current == NULL) {
+                    /* If we hit a separator before we have any content, then we
+                     * don't need to do anything. */
+                } else {
+                    /* If we hit a separator after we've hit content, then we
+                     * need to append that content to the list and reset the
+                     * current node. */
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                }
+
+                parser_lex(parser);
+                break;
+            }
+            case PM_TOKEN_STRING_CONTENT: {
+                pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                pm_node_flag_set(string, parse_unescaped_encoding(parser));
+                parser_lex(parser);
+
+                if (current == NULL) {
+                    /* If we hit content and the current node is NULL, then this
+                     * is the first string content we've seen. In that case
+                     * we're going to create a new string node and set that to
+                     * the current. */
+                    current = string;
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                    /* If we hit string content and the current node is an
+                     * interpolated string, then we need to append the string
+                     * content to the list of child nodes. */
+                    pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit string content and the current node is a string
+                     * node, then we need to convert the current node into an
+                     * interpolated string and add the string content to the
+                     * list of child nodes. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    pm_interpolated_string_node_append(parser, interpolated, string);
+                    current = UP(interpolated);
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                break;
+            }
+            case PM_TOKEN_EMBVAR: {
+                if (current == NULL) {
+                    /* If we hit an embedded variable and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit an embedded variable and the current node is a
+                     * string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    current = UP(interpolated);
+                } else {
+                    /* If we hit an embedded variable and the current node is an
+                     * interpolated string, then we'll just add the embedded
+                     * variable. */
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+                break;
+            }
+            case PM_TOKEN_EMBEXPR_BEGIN: {
+                if (current == NULL) {
+                    /* If we hit an embedded expression and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * a string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    current = UP(interpolated);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * an interpolated string, then we'll just continue on. */
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+                break;
+            }
+            default:
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
+                parser_lex(parser);
+                break;
+        }
+    }
+
+    /* If we have a current node, then we need to append it to the list. */
+    if (current) {
+        pm_array_node_elements_append(parser->arena, array, current);
+    }
+
+    pm_token_t closing = parser->current;
+    if (match1(parser, PM_TOKEN_EOF)) {
+        pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
+        closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+    } else {
+        expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
+    }
+
+    pm_array_node_close_set(parser, array, &closing);
+    return UP(array);
+}
+
+/**
+ * Parse an interpolated symbol array literal (`%I[...]`).
+ */
+static pm_node_t *
+parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
+    parser_lex(parser);
+    pm_token_t opening = parser->previous;
+    pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+    /* This is the current node that we are parsing that will be added to the
+     * list of elements. */
+    pm_node_t *current = NULL;
+
+    while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+        switch (parser->current.type) {
+            case PM_TOKEN_WORDS_SEP: {
+                if (current == NULL) {
+                    /* If we hit a separator before we have any content, then we
+                     * don't need to do anything. */
+                } else {
+                    /* If we hit a separator after we've hit content, then we
+                     * need to append that content to the list and reset the
+                     * current node. */
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                }
+
+                parser_lex(parser);
+                break;
+            }
+            case PM_TOKEN_STRING_CONTENT: {
+                if (current == NULL) {
+                    /* If we hit content and the current node is NULL, then this
+                     * is the first string content we've seen. In that case
+                     * we're going to create a new string node and set that to
+                     * the current. */
+                    current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+                    parser_lex(parser);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    /* If we hit string content and the current node is an
+                     * interpolated string, then we need to append the string
+                     * content to the list of child nodes. */
+                    pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                    parser_lex(parser);
+
+                    pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit string content and the current node is a symbol
+                     * node, then we need to convert the current node into an
+                     * interpolated string and add the string content to the
+                     * list of child nodes. */
+                    pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                    pm_token_t content = {
+                        .type = PM_TOKEN_STRING_CONTENT,
+                        .start = parser->start + cast->value_loc.start,
+                        .end = parser->start + cast->value_loc.start + cast->value_loc.length
+                    };
+
+                    pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+                    pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
+                    parser_lex(parser);
+
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
+
+                    current = UP(interpolated);
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                break;
+            }
+            case PM_TOKEN_EMBVAR: {
+                bool start_location_set = false;
+                if (current == NULL) {
+                    /* If we hit an embedded variable and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit an embedded variable and the current node is a
+                     * string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+                    current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+                    PM_NODE_START_SET_NODE(interpolated, current);
+                    start_location_set = true;
+                    current = UP(interpolated);
+                } else {
+                    /* If we hit an embedded variable and the current node is an
+                     * interpolated string, then we'll just add the embedded
+                     * variable. */
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+                if (!start_location_set) {
+                    PM_NODE_START_SET_NODE(current, part);
+                }
+                break;
+            }
+            case PM_TOKEN_EMBEXPR_BEGIN: {
+                bool start_location_set = false;
+                if (current == NULL) {
+                    /* If we hit an embedded expression and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * a string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+                    current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+                    PM_NODE_START_SET_NODE(interpolated, current);
+                    start_location_set = true;
+                    current = UP(interpolated);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * an interpolated string, then we'll just continue on. */
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+                if (!start_location_set) {
+                    PM_NODE_START_SET_NODE(current, part);
+                }
+                break;
+            }
+            default:
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
+                parser_lex(parser);
+                break;
+        }
+    }
+
+    /* If we have a current node, then we need to append it to the list. */
+    if (current) {
+        pm_array_node_elements_append(parser->arena, array, current);
+    }
+
+    pm_token_t closing = parser->current;
+    if (match1(parser, PM_TOKEN_EOF)) {
+        pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
+        closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+    } else {
+        expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+    }
+    pm_array_node_close_set(parser, array, &closing);
+
+    return UP(array);
+}
+
+/**
+ * Parse a parenthesized expression, which could be a grouping, a multi-target
+ * assignment, or a set of statements.
+ */
+static pm_node_t *
+parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
+    pm_token_t opening = parser->current;
+    pm_node_flags_t paren_flags = 0;
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    parser_lex(parser);
+    while (true) {
+        if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+            paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+        } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+            break;
+        }
+    }
+
+    /* If this is the end of the file or we match a right parenthesis, then we
+     * have an empty parentheses node, and we can immediately return. */
+    if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
+        expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+        pop_block_exits(parser, previous_block_exits);
+        return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
+    }
+
+    /* Otherwise, we're going to parse the first statement in the list of
+     * statements within the parentheses. */
+    pm_accepts_block_stack_push(parser, true);
+    context_push(parser, PM_CONTEXT_PARENS);
+    pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+    context_pop(parser);
+
+    /* Determine if this statement is followed by a terminator. In the case of a
+     * single statement, this is fine. But in the case of multiple statements
+     * it's required. */
+    bool terminator_found = false;
+
+    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+        terminator_found = true;
+        paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+    } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+        terminator_found = true;
+    }
+
+    if (terminator_found) {
+        while (true) {
+            if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+                paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+            } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+                break;
+            }
+        }
+    }
+
+    /* If we hit a right parenthesis, then we're done parsing the parentheses
+     * node, and we can check which kind of node we should return. */
+    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+        if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
+            lex_state_set(parser, PM_LEX_STATE_ENDARG);
+        }
+
+        parser_lex(parser);
+        pm_accepts_block_stack_pop(parser);
+        pop_block_exits(parser, previous_block_exits);
+
+        if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+            /* If we have a single statement and are ending on a right
+             * parenthesis, then we need to check if this is possibly a multiple
+             * target node. */
+            pm_multi_target_node_t *multi_target;
+
+            if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
+                multi_target = (pm_multi_target_node_t *) statement;
+            } else {
+                multi_target = pm_multi_target_node_create(parser);
+                pm_multi_target_node_targets_append(parser, multi_target, statement);
+            }
+
+            multi_target->lparen_loc = TOK2LOC(parser, &opening);
+            multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
+            PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
+            PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
+
+            pm_node_t *result;
+            if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+                result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                accept1(parser, PM_TOKEN_NEWLINE);
+            } else {
+                result = UP(multi_target);
+            }
+
+            if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+                /* All set, this is explicitly allowed by the parent context. */
+            } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+                /* All set, we're inside a for loop and we're parsing multiple
+                 * targets. */
+            } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                /* Multi targets are not allowed when it's not a statement
+                 * level. */
+                pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                /* Multi targets must be followed by an equal sign in order to
+                 * be valid (or a right parenthesis if they are nested). */
+                pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            }
+
+            return result;
+        }
+
+        /* If we have a single statement and are ending on a right parenthesis
+         * and we didn't return a multiple assignment node, then we can return a
+         * regular parentheses node now. */
+        pm_statements_node_t *statements = pm_statements_node_create(parser);
+        pm_statements_node_body_append(parser, statements, statement, true);
+
+        return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
+    }
+
+    /* If we have more than one statement in the set of parentheses, then we are
+     * going to parse all of them as a list of statements. We'll do that here.
+     */
+    context_push(parser, PM_CONTEXT_PARENS);
+    paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
+    pm_statements_node_t *statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, statements, statement, true);
+
+    /* If we didn't find a terminator and we didn't find a right parenthesis,
+     * then this is a syntax error. */
+    if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+    }
+
+    /* Parse each statement within the parentheses. */
+    while (true) {
+        pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+        pm_statements_node_body_append(parser, statements, node, true);
+
+        /* If we're recovering from a syntax error, then we need to stop parsing
+         * the statements now. */
+        if (parser->recovering) {
+            /* If this is the level of context where the recovery has happened,
+             * then we can mark the parser as done recovering. */
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
+            break;
+        }
+
+        /* If we couldn't parse an expression at all, then we need to bail out
+         * of the loop. */
+        if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
+
+        /* If we successfully parsed a statement, then we are going to need a
+         * terminator to delimit them. */
+        if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
+        } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+            break;
+        } else if (!match1(parser, PM_TOKEN_EOF)) {
+            /* If we're at the end of the file, then we're going to add an error
+             * after this for the ) anyway. */
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        }
+    }
+
+    context_pop(parser);
+    pm_accepts_block_stack_pop(parser);
+    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+    /* When we're parsing multi targets, we allow them to be followed by a right
+     * parenthesis if they are at the statement level. This is only possible if
+     * they are the final statement in a parentheses. We need to explicitly
+     * reject that here. */
+    {
+        pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+        if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+            pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+            pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+            statement = UP(multi_target);
+            statements->body.nodes[statements->body.size - 1] = statement;
+        }
+
+        if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+            const uint8_t *offset = parser->start + PM_NODE_END(statement);
+            pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+            pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
+
+            statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+            statements->body.nodes[statements->body.size - 1] = statement;
+
+            pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+        }
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    pm_void_statements_check(parser, statements, true);
+    return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
 }
 
 /**
  * Parse an expression that begins with the previous node that we just lexed.
  */
-static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
     switch (parser->current.type) {
         case PM_TOKEN_BRACKET_LEFT_ARRAY: {
             parser_lex(parser);
@@ -18092,11 +19089,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     } else {
                         // If there was no comma, then we need to add a syntax
                         // error.
-                        const uint8_t *location = parser->previous.end;
-                        PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
-
-                        parser->previous.start = location;
-                        parser->previous.type = PM_TOKEN_MISSING;
+                        PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
+                        parser->previous.start = parser->previous.end;
+                        parser->previous.type = 0;
                     }
                 }
 
@@ -18114,28 +19109,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
                         pm_parser_scope_forwarding_positionals_check(parser, &operator);
                     } else {
-                        expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                        expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
                     }
 
-                    element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+                    element = UP(pm_splat_node_create(parser, &operator, expression));
                 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
                     if (parsed_bare_hash) {
                         pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
                     }
 
-                    element = (pm_node_t *) pm_keyword_hash_node_create(parser);
+                    element = UP(pm_keyword_hash_node_create(parser));
                     pm_static_literals_t hash_keys = { 0 };
 
-                    if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                    if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
                         parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
                     }
 
                     pm_static_literals_free(&hash_keys);
                     parsed_bare_hash = true;
                 } else {
-                    element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
+                    element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
 
-                    if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+                    if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
                         if (parsed_bare_hash) {
                             pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
                         }
@@ -18144,18 +19139,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         pm_static_literals_t hash_keys = { 0 };
                         pm_hash_key_static_literals_add(parser, &hash_keys, element);
 
-                        pm_token_t operator;
+                        pm_token_t operator = { 0 };
                         if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
                             operator = parser->previous;
-                        } else {
-                            operator = not_provided(parser);
                         }
 
-                        pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
-                        pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
-                        pm_keyword_hash_node_elements_append(hash, assoc);
+                        pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                        pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
+                        pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
 
-                        element = (pm_node_t *) hash;
+                        element = UP(hash);
                         if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
                             parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
                         }
@@ -18165,236 +19158,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     }
                 }
 
-                pm_array_node_elements_append(array, element);
-                if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+                pm_array_node_elements_append(parser->arena, array, element);
+                if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
             }
 
             accept1(parser, PM_TOKEN_NEWLINE);
 
             if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
                 parser->previous.start = parser->previous.end;
-                parser->previous.type = PM_TOKEN_MISSING;
+                parser->previous.type = 0;
             }
 
-            pm_array_node_close_set(array, &parser->previous);
+            pm_array_node_close_set(parser, array, &parser->previous);
             pm_accepts_block_stack_pop(parser);
 
-            return (pm_node_t *) array;
+            return UP(array);
         }
         case PM_TOKEN_PARENTHESIS_LEFT:
-        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
-            pm_token_t opening = parser->current;
-            pm_node_flags_t flags = 0;
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            parser_lex(parser);
-            while (true) {
-                if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                    flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-                } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
-                    break;
-                }
-            }
-
-            // If this is the end of the file or we match a right parenthesis, then
-            // we have an empty parentheses node, and we can immediately return.
-            if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
-                expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
-                pop_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
-            }
-
-            // Otherwise, we're going to parse the first statement in the list
-            // of statements within the parentheses.
-            pm_accepts_block_stack_push(parser, true);
-            context_push(parser, PM_CONTEXT_PARENS);
-            pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
-            context_pop(parser);
-
-            // Determine if this statement is followed by a terminator. In the
-            // case of a single statement, this is fine. But in the case of
-            // multiple statements it's required.
-            bool terminator_found = false;
-
-            if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                terminator_found = true;
-                flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-            } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
-                terminator_found = true;
-            }
-
-            if (terminator_found) {
-                while (true) {
-                    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                        flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-                    } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
-                        break;
-                    }
-                }
-            }
-
-            // If we hit a right parenthesis, then we're done parsing the
-            // parentheses node, and we can check which kind of node we should
-            // return.
-            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
-                    lex_state_set(parser, PM_LEX_STATE_ENDARG);
-                }
-
-                parser_lex(parser);
-                pm_accepts_block_stack_pop(parser);
-
-                pop_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
-                    // If we have a single statement and are ending on a right
-                    // parenthesis, then we need to check if this is possibly a
-                    // multiple target node.
-                    pm_multi_target_node_t *multi_target;
-
-                    if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
-                        multi_target = (pm_multi_target_node_t *) statement;
-                    } else {
-                        multi_target = pm_multi_target_node_create(parser);
-                        pm_multi_target_node_targets_append(parser, multi_target, statement);
-                    }
-
-                    pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                    pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
-
-                    multi_target->lparen_loc = lparen_loc;
-                    multi_target->rparen_loc = rparen_loc;
-                    multi_target->base.location.start = lparen_loc.start;
-                    multi_target->base.location.end = rparen_loc.end;
-
-                    pm_node_t *result;
-                    if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
-                        result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
-                        accept1(parser, PM_TOKEN_NEWLINE);
-                    } else {
-                        result = (pm_node_t *) multi_target;
-                    }
-
-                    if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
-                        // All set, this is explicitly allowed by the parent
-                        // context.
-                    } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
-                        // All set, we're inside a for loop and we're parsing
-                        // multiple targets.
-                    } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
-                        // Multi targets are not allowed when it's not a
-                        // statement level.
-                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                    } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        // Multi targets must be followed by an equal sign in
-                        // order to be valid (or a right parenthesis if they are
-                        // nested).
-                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                    }
-
-                    return result;
-                }
-
-                // If we have a single statement and are ending on a right parenthesis
-                // and we didn't return a multiple assignment node, then we can return a
-                // regular parentheses node now.
-                pm_statements_node_t *statements = pm_statements_node_create(parser);
-                pm_statements_node_body_append(parser, statements, statement, true);
-
-                return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
-            }
-
-            // If we have more than one statement in the set of parentheses,
-            // then we are going to parse all of them as a list of statements.
-            // We'll do that here.
-            context_push(parser, PM_CONTEXT_PARENS);
-            flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-
-            pm_statements_node_t *statements = pm_statements_node_create(parser);
-            pm_statements_node_body_append(parser, statements, statement, true);
-
-            // If we didn't find a terminator and we didn't find a right
-            // parenthesis, then this is a syntax error.
-            if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
-            }
-
-            // Parse each statement within the parentheses.
-            while (true) {
-                pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
-                pm_statements_node_body_append(parser, statements, node, true);
-
-                // If we're recovering from a syntax error, then we need to stop
-                // parsing the statements now.
-                if (parser->recovering) {
-                    // If this is the level of context where the recovery has
-                    // happened, then we can mark the parser as done recovering.
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
-                    break;
-                }
-
-                // If we couldn't parse an expression at all, then we need to
-                // bail out of the loop.
-                if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
-
-                // If we successfully parsed a statement, then we are going to
-                // need terminator to delimit them.
-                if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                    while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
-                } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                    break;
-                } else if (!match1(parser, PM_TOKEN_EOF)) {
-                    // If we're at the end of the file, then we're going to add
-                    // an error after this for the ) anyway.
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
-                }
-            }
-
-            context_pop(parser);
-            pm_accepts_block_stack_pop(parser);
-            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
-            // When we're parsing multi targets, we allow them to be followed by
-            // a right parenthesis if they are at the statement level. This is
-            // only possible if they are the final statement in a parentheses.
-            // We need to explicitly reject that here.
-            {
-                pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
-
-                if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
-                    pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
-                    pm_multi_target_node_targets_append(parser, multi_target, statement);
-
-                    statement = (pm_node_t *) multi_target;
-                    statements->body.nodes[statements->body.size - 1] = statement;
-                }
-
-                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
-                    const uint8_t *offset = statement->location.end;
-                    pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
-                    pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
-
-                    statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
-                    statements->body.nodes[statements->body.size - 1] = statement;
-
-                    pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                }
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            pm_void_statements_check(parser, statements, true);
-            return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
-        }
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+            return parse_parentheses(parser, binding_power, depth);
         case PM_TOKEN_BRACE_LEFT: {
             // If we were passed a current_hash_keys via the parser, then that
             // means we're already parsing a hash and we want to share the set
@@ -18409,14 +19192,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_accepts_block_stack_push(parser, true);
             parser_lex(parser);
 
-            pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+            pm_token_t opening = parser->previous;
+            pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
 
             if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
                 if (current_hash_keys != NULL) {
-                    parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+                    parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
                 } else {
                     pm_static_literals_t hash_keys = { 0 };
-                    parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+                    parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
                     pm_static_literals_free(&hash_keys);
                 }
 
@@ -18424,26 +19208,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             }
 
             pm_accepts_block_stack_pop(parser);
-            expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
-            pm_hash_node_closing_loc_set(node, &parser->previous);
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
+            pm_hash_node_closing_loc_set(parser, node, &parser->previous);
 
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_CHARACTER_LITERAL: {
-            parser_lex(parser);
-
-            pm_token_t opening = parser->previous;
-            opening.type = PM_TOKEN_STRING_BEGIN;
-            opening.end = opening.start + 1;
-
-            pm_token_t content = parser->previous;
-            content.type = PM_TOKEN_STRING_CONTENT;
-            content.start = content.start + 1;
+            pm_node_t *node = UP(pm_string_node_create_current_string(
+                parser,
+                &(pm_token_t) {
+                    .type = PM_TOKEN_STRING_BEGIN,
+                    .start = parser->current.start,
+                    .end = parser->current.start + 1
+                },
+                &(pm_token_t) {
+                    .type = PM_TOKEN_STRING_CONTENT,
+                    .start = parser->current.start + 1,
+                    .end = parser->current.end
+                },
+                NULL
+            ));
 
-            pm_token_t closing = not_provided(parser);
-            pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
             pm_node_flag_set(node, parse_unescaped_encoding(parser));
 
+            // Skip past the character literal here, since now we have handled
+            // parser->explicit_encoding correctly.
+            parser_lex(parser);
+
             // Characters can be followed by strings in which case they are
             // automatically concatenated.
             if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -18454,7 +19245,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         }
         case PM_TOKEN_CLASS_VARIABLE: {
             parser_lex(parser);
-            pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
 
             if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18470,16 +19261,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             // fact a method call, not a constant read.
             if (
                 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
-                (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+                ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
                 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
                 match1(parser, PM_TOKEN_BRACE_LEFT)
             ) {
                 pm_arguments_t arguments = { 0 };
-                parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
-                return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
+                parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+                return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
             }
 
-            pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
 
             if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
                 // If we get here, then we have a comma immediately following a
@@ -18494,7 +19285,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_token_t delimiter = parser->previous;
 
             expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-            pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
+            pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
 
             if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18507,7 +19298,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_token_t operator = parser->current;
             parser_lex(parser);
 
-            pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
 
             // Unary .. and ... are special because these are non-associative
             // operators that can also be unary operators. In this case we need
@@ -18517,23 +19308,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
             }
 
-            return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+            return UP(pm_range_node_create(parser, NULL, &operator, right));
         }
         case PM_TOKEN_FLOAT:
             parser_lex(parser);
-            return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
+            return UP(pm_float_node_create(parser, &parser->previous));
         case PM_TOKEN_FLOAT_IMAGINARY:
             parser_lex(parser);
-            return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
+            return UP(pm_float_node_imaginary_create(parser, &parser->previous));
         case PM_TOKEN_FLOAT_RATIONAL:
             parser_lex(parser);
-            return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
+            return UP(pm_float_node_rational_create(parser, &parser->previous));
         case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
             parser_lex(parser);
-            return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
+            return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
         case PM_TOKEN_NUMBERED_REFERENCE: {
             parser_lex(parser);
-            pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
 
             if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18543,7 +19334,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         }
         case PM_TOKEN_GLOBAL_VARIABLE: {
             parser_lex(parser);
-            pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
 
             if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18553,7 +19344,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         }
         case PM_TOKEN_BACK_REFERENCE: {
             parser_lex(parser);
-            pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
 
             if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18575,26 +19366,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_call_node_t *call = (pm_call_node_t *) node;
                 pm_arguments_t arguments = { 0 };
 
-                if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
+                if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
                     // Since we found arguments, we need to turn off the
                     // variable call bit in the flags.
-                    pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
+                    pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
 
                     call->opening_loc = arguments.opening_loc;
                     call->arguments = arguments.arguments;
                     call->closing_loc = arguments.closing_loc;
                     call->block = arguments.block;
 
-                    if (arguments.block != NULL) {
-                        call->base.location.end = arguments.block->location.end;
-                    } else if (arguments.closing_loc.start == NULL) {
-                        if (arguments.arguments != NULL) {
-                            call->base.location.end = arguments.arguments->base.location.end;
-                        } else {
-                            call->base.location.end = call->message_loc.end;
-                        }
+                    const pm_location_t *end = pm_arguments_end(&arguments);
+                    if (end == NULL) {
+                        PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
                     } else {
-                        call->base.location.end = arguments.closing_loc.end;
+                        PM_NODE_LENGTH_SET_LOCATION(call, end);
                     }
                 }
             } else {
@@ -18602,19 +19388,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 // can still be a method call if it is followed by arguments or
                 // a block, so we need to check for that here.
                 if (
-                    (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+                    ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
                     (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
                     match1(parser, PM_TOKEN_BRACE_LEFT)
                 ) {
                     pm_arguments_t arguments = { 0 };
-                    parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                    parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
                     pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
 
                     if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
                         // If we're about to convert an 'it' implicit local
                         // variable read into a method call, we need to remove
                         // it from the list of implicit local variables.
-                        parse_target_implicit_parameter(parser, node);
+                        pm_node_unreference(parser, node);
                     } else {
                         // Otherwise, we're about to convert a regular local
                         // variable read into a method call, in which case we
@@ -18622,16 +19408,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         // purposes of warnings.
                         assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
 
-                        if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
-                            parse_target_implicit_parameter(parser, node);
+                        if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
+                            pm_node_unreference(parser, node);
                         } else {
                             pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
                             pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
                         }
                     }
 
-                    pm_node_destroy(parser, node);
-                    return (pm_node_t *) fcall;
+                    return UP(fcall);
                 }
             }
 
@@ -18663,12 +19448,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_token_t content = parse_strings_empty_content(parser->previous.start);
 
                 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
-                    node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+                    node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
                 } else {
-                    node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+                    node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
                 }
 
-                node->location.end = opening.end;
+                PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
             } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
                 // If we get here, then we tried to find something in the
                 // heredoc but couldn't actually parse anything, so we'll just
@@ -18676,7 +19461,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 //
                 // parse_string_part handles its own errors, so there is no need
                 // for us to add one here.
-                node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+                node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
             } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                 // If we get here, then the part that we parsed was plain string
                 // content and we're at the end of the heredoc, so we can return
@@ -18685,8 +19470,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_node_flag_set(part, parse_unescaped_encoding(parser));
                 pm_string_node_t *cast = (pm_string_node_t *) part;
 
-                cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
-                cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
+                cast->opening_loc = TOK2LOC(parser, &opening);
+                cast->closing_loc = TOK2LOC(parser, &parser->current);
                 cast->base.location = cast->opening_loc;
 
                 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -18695,21 +19480,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 }
 
                 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
-                    parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
+                    parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
                 }
 
-                node = (pm_node_t *) cast;
+                node = UP(cast);
                 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
             } else {
                 // If we get here, then we have multiple parts in the heredoc,
                 // so we'll need to create an interpolated string node to hold
                 // them all.
                 pm_node_list_t parts = { 0 };
-                pm_node_list_append(&parts, part);
+                pm_node_list_append(parser->arena, &parts, part);
 
                 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                     if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                        pm_node_list_append(&parts, part);
+                        pm_node_list_append(parser->arena, &parts, part);
                     }
                 }
 
@@ -18720,19 +19505,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     cast->parts = parts;
 
                     expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
-                    pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+                    pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
 
                     cast->base.location = cast->opening_loc;
-                    node = (pm_node_t *) cast;
+                    node = UP(cast);
                 } else {
                     pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
-                    pm_node_list_free(&parts);
 
                     expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
-                    pm_interpolated_string_node_closing_set(cast, &parser->previous);
+                    pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
 
                     cast->base.location = cast->opening_loc;
-                    node = (pm_node_t *) cast;
+                    node = UP(cast);
                 }
 
                 // If this is a heredoc that is indented with a ~, then we need
@@ -18757,7 +19541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         }
         case PM_TOKEN_INSTANCE_VARIABLE: {
             parser_lex(parser);
-            pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
 
             if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18766,34 +19550,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             return node;
         }
         case PM_TOKEN_INTEGER: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
-            return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
+            return UP(pm_integer_node_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_IMAGINARY: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
-            return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
+            return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_RATIONAL: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
-            return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
+            return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
-            return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
+            return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_KEYWORD___ENCODING__:
             parser_lex(parser);
-            return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
+            return UP(pm_source_encoding_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD___FILE__:
             parser_lex(parser);
-            return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
+            return UP(pm_source_file_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD___LINE__:
             parser_lex(parser);
-            return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
+            return UP(pm_source_line_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD_ALIAS: {
             if (binding_power != PM_BINDING_POWER_STATEMENT) {
                 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
@@ -18813,245 +19597,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
                             pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
                         }
-                    } else {
+                    } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
                         pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+                        old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
                     }
 
-                    return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
+                    return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
                 }
                 case PM_SYMBOL_NODE:
                 case PM_INTERPOLATED_SYMBOL_NODE: {
-                    if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
                         pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+                        old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
                     }
                 }
                 PRISM_FALLTHROUGH
                 default:
-                    return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
-            }
-        }
-        case PM_TOKEN_KEYWORD_CASE: {
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-
-            pm_token_t case_keyword = parser->previous;
-            pm_node_t *predicate = NULL;
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-                predicate = NULL;
-            } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
-                predicate = NULL;
-             } else if (!token_begins_expression_p(parser->current.type)) {
-                predicate = NULL;
-            } else {
-                predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
-                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-            }
-
-            if (match1(parser, PM_TOKEN_KEYWORD_END)) {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
-                parser_lex(parser);
-
-                pop_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
-            }
-
-            // At this point we can create a case node, though we don't yet know
-            // if it is a case-in or case-when node.
-            pm_token_t end_keyword = not_provided(parser);
-            pm_node_t *node;
-
-            if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
-                pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
-                pm_static_literals_t literals = { 0 };
-
-                // At this point we've seen a when keyword, so we know this is a
-                // case-when node. We will continue to parse the when nodes
-                // until we hit the end of the list.
-                while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-                    parser_lex(parser);
-
-                    pm_token_t when_keyword = parser->previous;
-                    pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
-
-                    do {
-                        if (accept1(parser, PM_TOKEN_USTAR)) {
-                            pm_token_t operator = parser->previous;
-                            pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
-
-                            pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
-                            pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
-
-                            if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
-                        } else {
-                            pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
-                            pm_when_node_conditions_append(when_node, condition);
-
-                            // If we found a missing node, then this is a syntax
-                            // error and we should stop looping.
-                            if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
-
-                            // If this is a string node, then we need to mark it
-                            // as frozen because when clause strings are frozen.
-                            if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
-                                pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
-                            } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
-                                pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
-                            }
-
-                            pm_when_clause_static_literals_add(parser, &literals, condition);
-                        }
-                    } while (accept1(parser, PM_TOKEN_COMMA));
-
-                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
-                            pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
-                        }
-                    } else {
-                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
-                        pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
-                    }
-
-                    if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                        pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
-                        if (statements != NULL) {
-                            pm_when_node_statements_set(when_node, statements);
-                        }
-                    }
-
-                    pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
-                }
-
-                // If we didn't parse any conditions (in or when) then we need
-                // to indicate that we have an error.
-                if (case_node->conditions.size == 0) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                }
-
-                pm_static_literals_free(&literals);
-                node = (pm_node_t *) case_node;
-            } else {
-                pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
-
-                // If this is a case-match node (i.e., it is a pattern matching
-                // case statement) then we must have a predicate.
-                if (predicate == NULL) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
-                }
-
-                // At this point we expect that we're parsing a case-in node. We
-                // will continue to parse the in nodes until we hit the end of
-                // the list.
-                while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-
-                    bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
-                    parser->pattern_matching_newlines = true;
-
-                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-                    parser->command_start = false;
-                    parser_lex(parser);
-
-                    pm_token_t in_keyword = parser->previous;
-
-                    pm_constant_id_list_t captures = { 0 };
-                    pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
-
-                    parser->pattern_matching_newlines = previous_pattern_matching_newlines;
-                    pm_constant_id_list_free(&captures);
-
-                    // Since we're in the top-level of the case-in node we need
-                    // to check for guard clauses in the form of `if` or
-                    // `unless` statements.
-                    if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
-                        pm_token_t keyword = parser->previous;
-                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
-                        pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
-                    } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
-                        pm_token_t keyword = parser->previous;
-                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
-                        pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
-                    }
-
-                    // Now we need to check for the terminator of the in node's
-                    // pattern. It can be a newline or semicolon optionally
-                    // followed by a `then` keyword.
-                    pm_token_t then_keyword;
-                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
-                            then_keyword = parser->previous;
-                        } else {
-                            then_keyword = not_provided(parser);
-                        }
-                    } else {
-                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
-                        then_keyword = parser->previous;
-                    }
-
-                    // Now we can actually parse the statements associated with
-                    // the in node.
-                    pm_statements_node_t *statements;
-                    if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                        statements = NULL;
-                    } else {
-                        statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
-                    }
-
-                    // Now that we have the full pattern and statements, we can
-                    // create the node and attach it to the case node.
-                    pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
-                    pm_case_match_node_condition_append(case_node, condition);
-                }
-
-                // If we didn't parse any conditions (in or when) then we need
-                // to indicate that we have an error.
-                if (case_node->conditions.size == 0) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                }
-
-                node = (pm_node_t *) case_node;
-            }
-
-            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-            if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
-                pm_token_t else_keyword = parser->previous;
-                pm_else_node_t *else_node;
-
-                if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
-                    else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
-                } else {
-                    else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
-                }
-
-                if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
-                    pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
-                } else {
-                    pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
-                }
-            }
-
-            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
-
-            if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
-                pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
-            } else {
-                pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
+                    return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
             }
-
-            pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return node;
         }
+        case PM_TOKEN_KEYWORD_CASE:
+            return parse_case(parser, flags, depth);
         case PM_TOKEN_KEYWORD_BEGIN: {
             size_t opening_newline_index = token_newline_index(parser);
             parser_lex(parser);
@@ -19072,15 +19638,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
             parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
-
-            begin_node->base.location.end = parser->previous.end;
-            pm_begin_node_end_keyword_set(begin_node, &parser->previous);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
 
+            PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
+            pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
             pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) begin_node;
+            return UP(begin_node);
         }
         case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
             pm_node_list_t current_block_exits = { 0 };
@@ -19097,16 +19660,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_token_t opening = parser->previous;
             pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
 
-            expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
             pm_context_t context = parser->current_context->context;
             if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
                 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
             }
 
             flush_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+            return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
         }
         case PM_TOKEN_KEYWORD_BREAK:
         case PM_TOKEN_KEYWORD_NEXT:
@@ -19123,29 +19684,44 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
 
                 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
-                    parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+                    pm_token_t next = parser->current;
+                    parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
+
+                    // Reject `foo && return bar`.
+                    if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
+                    }
+                }
+
+                // It's possible that we've parsed a block argument through our
+                // call to parse_arguments. If we found one, we should mark it
+                // as invalid and destroy it, as we don't have a place for it.
+                if (arguments.block != NULL) {
+                    pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
+                    pm_node_unreference(parser, arguments.block);
+                    arguments.block = NULL;
                 }
             }
 
             switch (keyword.type) {
                 case PM_TOKEN_KEYWORD_BREAK: {
-                    pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
+                    pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
                     if (!parser->partial_script) parse_block_exit(parser, node);
                     return node;
                 }
                 case PM_TOKEN_KEYWORD_NEXT: {
-                    pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
+                    pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
                     if (!parser->partial_script) parse_block_exit(parser, node);
                     return node;
                 }
                 case PM_TOKEN_KEYWORD_RETURN: {
-                    pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
+                    pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
                     parse_return(parser, node);
                     return node;
                 }
                 default:
                     assert(false && "unreachable");
-                    return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+                    return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
             }
         }
         case PM_TOKEN_KEYWORD_SUPER: {
@@ -19153,24 +19729,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             pm_token_t keyword = parser->previous;
             pm_arguments_t arguments = { 0 };
-            parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+            parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
 
             if (
-                arguments.opening_loc.start == NULL &&
+                arguments.opening_loc.length == 0 &&
                 arguments.arguments == NULL &&
                 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
             ) {
-                return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
+                return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
             }
 
-            return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
+            return UP(pm_super_node_create(parser, &keyword, &arguments));
         }
         case PM_TOKEN_KEYWORD_YIELD: {
             parser_lex(parser);
 
             pm_token_t keyword = parser->previous;
             pm_arguments_t arguments = { 0 };
-            parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
+            parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
 
             // It's possible that we've parsed a block argument through our
             // call to parse_arguments_list. If we found one, we should mark it
@@ -19178,434 +19754,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             // yield node.
             if (arguments.block != NULL) {
                 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
-                pm_node_destroy(parser, arguments.block);
+                pm_node_unreference(parser, arguments.block);
                 arguments.block = NULL;
             }
 
-            pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
+            pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
             if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
 
             return node;
         }
-        case PM_TOKEN_KEYWORD_CLASS: {
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-
-            pm_token_t class_keyword = parser->previous;
-            pm_do_loop_stack_push(parser, false);
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            if (accept1(parser, PM_TOKEN_LESS_LESS)) {
-                pm_token_t operator = parser->previous;
-                pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
-
-                pm_parser_scope_push(parser, true);
-                if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
-                }
-
-                pm_node_t *statements = NULL;
-                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                    pm_accepts_block_stack_push(parser, true);
-                    statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
-                    pm_accepts_block_stack_pop(parser);
-                }
-
-                if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
-                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                    statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
-                } else {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
-                }
-
-                expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
-                pm_constant_id_list_t locals;
-                pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-                pm_parser_scope_pop(parser);
-                pm_do_loop_stack_pop(parser);
-
-                flush_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
-            }
-
-            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
-            pm_token_t name = parser->previous;
-            if (name.type != PM_TOKEN_CONSTANT) {
-                pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
-            }
-
-            pm_token_t inheritance_operator;
-            pm_node_t *superclass;
-
-            if (match1(parser, PM_TOKEN_LESS)) {
-                inheritance_operator = parser->current;
-                lex_state_set(parser, PM_LEX_STATE_BEG);
-
-                parser->command_start = true;
-                parser_lex(parser);
-
-                superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
-            } else {
-                inheritance_operator = not_provided(parser);
-                superclass = NULL;
-            }
-
-            pm_parser_scope_push(parser, true);
-
-            if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
-                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
-            } else {
-                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-            }
-            pm_node_t *statements = NULL;
-
-            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                pm_accepts_block_stack_push(parser, true);
-                statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
-                pm_accepts_block_stack_pop(parser);
-            }
-
-            if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
-                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
-            } else {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
-            }
-
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
-            if (context_def_p(parser)) {
-                pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-            pm_parser_scope_pop(parser);
-            pm_do_loop_stack_pop(parser);
-
-            if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
-                pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
-        }
-        case PM_TOKEN_KEYWORD_DEF: {
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            pm_token_t def_keyword = parser->current;
-            size_t opening_newline_index = token_newline_index(parser);
-
-            pm_node_t *receiver = NULL;
-            pm_token_t operator = not_provided(parser);
-            pm_token_t name;
-
-            // This context is necessary for lexing `...` in a bare params
-            // correctly. It must be pushed before lexing the first param, so it
-            // is here.
-            context_push(parser, PM_CONTEXT_DEF_PARAMS);
-            parser_lex(parser);
-
-            // This will be false if the method name is not a valid identifier
-            // but could be followed by an operator.
-            bool valid_name = true;
-
-            switch (parser->current.type) {
-                case PM_CASE_OPERATOR:
-                    pm_parser_scope_push(parser, true);
-                    lex_state_set(parser, PM_LEX_STATE_ENDFN);
-                    parser_lex(parser);
-
-                    name = parser->previous;
-                    break;
-                case PM_TOKEN_IDENTIFIER: {
-                    parser_lex(parser);
-
-                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
-                        receiver = parse_variable_call(parser);
-
-                        pm_parser_scope_push(parser, true);
-                        lex_state_set(parser, PM_LEX_STATE_FNAME);
-                        parser_lex(parser);
-
-                        operator = parser->previous;
-                        name = parse_method_definition_name(parser);
-                    } else {
-                        pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
-                        pm_parser_scope_push(parser, true);
-
-                        name = parser->previous;
-                    }
-
-                    break;
-                }
-                case PM_TOKEN_INSTANCE_VARIABLE:
-                case PM_TOKEN_CLASS_VARIABLE:
-                case PM_TOKEN_GLOBAL_VARIABLE:
-                    valid_name = false;
-                    PRISM_FALLTHROUGH
-                case PM_TOKEN_CONSTANT:
-                case PM_TOKEN_KEYWORD_NIL:
-                case PM_TOKEN_KEYWORD_SELF:
-                case PM_TOKEN_KEYWORD_TRUE:
-                case PM_TOKEN_KEYWORD_FALSE:
-                case PM_TOKEN_KEYWORD___FILE__:
-                case PM_TOKEN_KEYWORD___LINE__:
-                case PM_TOKEN_KEYWORD___ENCODING__: {
-                    pm_parser_scope_push(parser, true);
-                    parser_lex(parser);
-
-                    pm_token_t identifier = parser->previous;
-
-                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
-                        lex_state_set(parser, PM_LEX_STATE_FNAME);
-                        parser_lex(parser);
-                        operator = parser->previous;
-
-                        switch (identifier.type) {
-                            case PM_TOKEN_CONSTANT:
-                                receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_INSTANCE_VARIABLE:
-                                receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_CLASS_VARIABLE:
-                                receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_GLOBAL_VARIABLE:
-                                receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD_NIL:
-                                receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD_SELF:
-                                receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD_TRUE:
-                                receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD_FALSE:
-                                receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD___FILE__:
-                                receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD___LINE__:
-                                receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
-                                break;
-                            case PM_TOKEN_KEYWORD___ENCODING__:
-                                receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
-                                break;
-                            default:
-                                break;
-                        }
-
-                        name = parse_method_definition_name(parser);
-                    } else {
-                        if (!valid_name) {
-                            PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
-                        }
-
-                        name = identifier;
-                    }
-                    break;
-                }
-                case PM_TOKEN_PARENTHESIS_LEFT: {
-                    // The current context is `PM_CONTEXT_DEF_PARAMS`, however
-                    // the inner expression of this parenthesis should not be
-                    // processed under this context. Thus, the context is popped
-                    // here.
-                    context_pop(parser);
-                    parser_lex(parser);
-
-                    pm_token_t lparen = parser->previous;
-                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
-
-                    accept1(parser, PM_TOKEN_NEWLINE);
-                    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-                    pm_token_t rparen = parser->previous;
-
-                    lex_state_set(parser, PM_LEX_STATE_FNAME);
-                    expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
-
-                    operator = parser->previous;
-                    receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
-
-                    // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
-                    // reason as described the above.
-                    pm_parser_scope_push(parser, true);
-                    context_push(parser, PM_CONTEXT_DEF_PARAMS);
-                    name = parse_method_definition_name(parser);
-                    break;
-                }
-                default:
-                    pm_parser_scope_push(parser, true);
-                    name = parse_method_definition_name(parser);
-                    break;
-            }
-
-            pm_token_t lparen;
-            pm_token_t rparen;
-            pm_parameters_node_t *params;
-
-            switch (parser->current.type) {
-                case PM_TOKEN_PARENTHESIS_LEFT: {
-                    parser_lex(parser);
-                    lparen = parser->previous;
-
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        params = NULL;
-                    } else {
-                        params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
-                    }
-
-                    lex_state_set(parser, PM_LEX_STATE_BEG);
-                    parser->command_start = true;
-
-                    context_pop(parser);
-                    if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
-                        parser->previous.start = parser->previous.end;
-                        parser->previous.type = PM_TOKEN_MISSING;
-                    }
-
-                    rparen = parser->previous;
-                    break;
-                }
-                case PM_CASE_PARAMETER: {
-                    // If we're about to lex a label, we need to add the label
-                    // state to make sure the next newline is ignored.
-                    if (parser->current.type == PM_TOKEN_LABEL) {
-                        lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
-                    }
-
-                    lparen = not_provided(parser);
-                    rparen = not_provided(parser);
-                    params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
-
-                    context_pop(parser);
-                    break;
-                }
-                default: {
-                    lparen = not_provided(parser);
-                    rparen = not_provided(parser);
-                    params = NULL;
-
-                    context_pop(parser);
-                    break;
-                }
-            }
-
-            pm_node_t *statements = NULL;
-            pm_token_t equal;
-            pm_token_t end_keyword;
-
-            if (accept1(parser, PM_TOKEN_EQUAL)) {
-                if (token_is_setter_name(&name)) {
-                    pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
-                }
-                equal = parser->previous;
-
-                context_push(parser, PM_CONTEXT_DEF);
-                pm_do_loop_stack_push(parser, false);
-                statements = (pm_node_t *) pm_statements_node_create(parser);
-
-                pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
-
-                if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
-                    context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
-
-                    pm_token_t rescue_keyword = parser->previous;
-                    pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
-                    context_pop(parser);
-
-                    statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
-                }
-
-                pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
-                pm_do_loop_stack_pop(parser);
-                context_pop(parser);
-                end_keyword = not_provided(parser);
-            } else {
-                equal = not_provided(parser);
-
-                if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
-                    lex_state_set(parser, PM_LEX_STATE_BEG);
-                    parser->command_start = true;
-                    expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
-                } else {
-                    accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-                }
-
-                pm_accepts_block_stack_push(parser, true);
-                pm_do_loop_stack_push(parser, false);
-
-                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                    pm_accepts_block_stack_push(parser, true);
-                    statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
-                    pm_accepts_block_stack_pop(parser);
-                }
-
-                if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
-                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                    statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
-                } else {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
-                }
-
-                pm_accepts_block_stack_pop(parser);
-                pm_do_loop_stack_pop(parser);
-
-                expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
-                end_keyword = parser->previous;
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-            pm_parser_scope_pop(parser);
-
-            /**
-             * If the final character is @. As is the case when defining
-             * methods to override the unary operators, we should ignore
-             * the @ in the same way we do for symbols.
-             */
-            pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
-
-            flush_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) pm_def_node_create(
-                parser,
-                name_id,
-                &name,
-                receiver,
-                params,
-                statements,
-                &locals,
-                &def_keyword,
-                &operator,
-                &lparen,
-                &rparen,
-                &equal,
-                &end_keyword
-            );
-        }
+        case PM_TOKEN_KEYWORD_CLASS:
+            return parse_class(parser, flags, depth);
+        case PM_TOKEN_KEYWORD_DEF:
+            return parse_def(parser, binding_power, flags, depth);
         case PM_TOKEN_KEYWORD_DEFINED: {
             parser_lex(parser);
-            pm_token_t keyword = parser->previous;
 
-            pm_token_t lparen;
-            pm_token_t rparen;
+            pm_token_t keyword = parser->previous;
+            pm_token_t lparen = { 0 };
+            pm_token_t rparen = { 0 };
             pm_node_t *expression;
 
             context_push(parser, PM_CONTEXT_DEFINED);
@@ -19615,34 +19782,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 lparen = parser->previous;
 
                 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                    expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
-                    lparen = not_provided(parser);
-                    rparen = not_provided(parser);
+                    expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+                    lparen = (pm_token_t) { 0 };
                 } else {
-                    expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+                    expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
 
-                    if (parser->recovering) {
-                        rparen = not_provided(parser);
-                    } else {
+                    if (!parser->recovering) {
                         accept1(parser, PM_TOKEN_NEWLINE);
                         expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
                         rparen = parser->previous;
                     }
                 }
             } else {
-                lparen = not_provided(parser);
-                rparen = not_provided(parser);
-                expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+                expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
             }
 
             context_pop(parser);
-            return (pm_node_t *) pm_defined_node_create(
+            return UP(pm_defined_node_create(
                 parser,
-                &lparen,
+                NTOK2PTR(lparen),
                 expression,
-                &rparen,
-                &PM_LOCATION_TOKEN_VALUE(&keyword)
-            );
+                NTOK2PTR(rparen),
+                &keyword
+            ));
         }
         case PM_TOKEN_KEYWORD_END_UPCASE: {
             if (binding_power != PM_BINDING_POWER_STATEMENT) {
@@ -19660,12 +19822,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_token_t opening = parser->previous;
             pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
 
-            expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
-            return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
+            return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
         }
         case PM_TOKEN_KEYWORD_FALSE:
             parser_lex(parser);
-            return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
+            return UP(pm_false_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD_FOR: {
             size_t opening_newline_index = token_newline_index(parser);
             parser_lex(parser);
@@ -19681,15 +19843,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 pm_node_t *name = NULL;
 
                 if (token_begins_expression_p(parser->current.type)) {
-                    name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                    name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
                 }
 
-                index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+                index = UP(pm_splat_node_create(parser, &star_operator, name));
             } else if (token_begins_expression_p(parser->current.type)) {
-                index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+                index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
             } else {
                 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
-                index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
+                index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
             }
 
             // Now, if there are multiple index expressions, parse them out.
@@ -19705,16 +19867,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
             pm_token_t in_keyword = parser->previous;
 
-            pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
+            pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
             pm_do_loop_stack_pop(parser);
 
-            pm_token_t do_keyword;
+            pm_token_t do_keyword = { 0 };
             if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
                 do_keyword = parser->previous;
             } else {
-                do_keyword = not_provided(parser);
                 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
                 }
             }
 
@@ -19724,13 +19885,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             }
 
             parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
 
-            return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
+            return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
         }
         case PM_TOKEN_KEYWORD_IF:
             if (parser_end_of_line_p(parser)) {
-                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
             }
 
             size_t opening_newline_index = token_newline_index(parser);
@@ -19747,26 +19908,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
             pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
 
-            if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
-                pm_node_destroy(parser, name);
+            if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
             } else {
-                pm_undef_node_append(undef, name);
+                pm_undef_node_append(parser->arena, undef, name);
 
                 while (match1(parser, PM_TOKEN_COMMA)) {
                     lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
                     parser_lex(parser);
                     name = parse_undef_argument(parser, (uint16_t) (depth + 1));
 
-                    if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
-                        pm_node_destroy(parser, name);
+                    if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
                         break;
                     }
 
-                    pm_undef_node_append(undef, name);
+                    pm_undef_node_append(parser->arena, undef, name);
                 }
             }
 
-            return (pm_node_t *) undef;
+            return UP(undef);
         }
         case PM_TOKEN_KEYWORD_NOT: {
             parser_lex(parser);
@@ -19775,28 +19934,46 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_arguments_t arguments = { 0 };
             pm_node_t *receiver = NULL;
 
+            // The `not` keyword without parentheses is only valid in contexts
+            // where it would be parsed as an expression (i.e., at or below
+            // the `not` binding power level). In other contexts (e.g., method
+            // arguments, array elements, assignment right-hand sides),
+            // parentheses are required: `not(x)`. An exception is made for
+            // endless def bodies, where `not` is valid as both `arg` and
+            // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
+            if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+                if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
+                    pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
+                } else {
+                    accept1(parser, PM_TOKEN_NEWLINE);
+                    pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
+                }
+
+                return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+            }
+
             accept1(parser, PM_TOKEN_NEWLINE);
 
             if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
                 pm_token_t lparen = parser->previous;
 
                 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                    receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
+                    receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
                 } else {
-                    arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
-                    receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+                    arguments.opening_loc = TOK2LOC(parser, &lparen);
+                    receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
 
                     if (!parser->recovering) {
                         accept1(parser, PM_TOKEN_NEWLINE);
                         expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-                        arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+                        arguments.closing_loc = TOK2LOC(parser, &parser->previous);
                     }
                 }
             } else {
-                receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+                receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
             }
 
-            return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
+            return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
         }
         case PM_TOKEN_KEYWORD_UNLESS: {
             size_t opening_newline_index = token_newline_index(parser);
@@ -19804,81 +19981,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
         }
-        case PM_TOKEN_KEYWORD_MODULE: {
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-            pm_token_t module_keyword = parser->previous;
-
-            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
-            pm_token_t name;
-
-            // If we can recover from a syntax error that occurred while parsing
-            // the name of the module, then we'll handle that here.
-            if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
-                pop_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
-                return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
-            }
-
-            while (accept1(parser, PM_TOKEN_COLON_COLON)) {
-                pm_token_t double_colon = parser->previous;
-
-                expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-                constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
-            }
-
-            // Here we retrieve the name of the module. If it wasn't a constant,
-            // then it's possible that `module foo` was passed, which is a
-            // syntax error. We handle that here as well.
-            name = parser->previous;
-            if (name.type != PM_TOKEN_CONSTANT) {
-                pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
-            }
-
-            pm_parser_scope_push(parser, true);
-            accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
-            pm_node_t *statements = NULL;
-
-            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                pm_accepts_block_stack_push(parser, true);
-                statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
-                pm_accepts_block_stack_pop(parser);
-            }
-
-            if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
-                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
-            } else {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-            pm_parser_scope_pop(parser);
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
-
-            if (context_def_p(parser)) {
-                pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
-        }
+        case PM_TOKEN_KEYWORD_MODULE:
+            return parse_module(parser, flags, depth);
         case PM_TOKEN_KEYWORD_NIL:
             parser_lex(parser);
-            return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
+            return UP(pm_nil_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD_REDO: {
             parser_lex(parser);
 
-            pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
             if (!parser->partial_script) parse_block_exit(parser, node);
 
             return node;
@@ -19886,17 +19997,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         case PM_TOKEN_KEYWORD_RETRY: {
             parser_lex(parser);
 
-            pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
+            pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
             parse_retry(parser, node);
 
             return node;
         }
         case PM_TOKEN_KEYWORD_SELF:
             parser_lex(parser);
-            return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
+            return UP(pm_self_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD_TRUE:
             parser_lex(parser);
-            return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
+            return UP(pm_true_node_create(parser, &parser->previous));
         case PM_TOKEN_KEYWORD_UNTIL: {
             size_t opening_newline_index = token_newline_index(parser);
 
@@ -19905,16 +20016,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             parser_lex(parser);
             pm_token_t keyword = parser->previous;
-            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
 
             pm_do_loop_stack_pop(parser);
             context_pop(parser);
 
-            pm_token_t do_keyword;
+            pm_token_t do_keyword = { 0 };
             if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
                 do_keyword = parser->previous;
             } else {
-                do_keyword = not_provided(parser);
                 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
             }
 
@@ -19927,9 +20037,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             }
 
             parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
 
-            return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+            return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
         }
         case PM_TOKEN_KEYWORD_WHILE: {
             size_t opening_newline_index = token_newline_index(parser);
@@ -19939,16 +20049,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             parser_lex(parser);
             pm_token_t keyword = parser->previous;
-            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
 
             pm_do_loop_stack_pop(parser);
             context_pop(parser);
 
-            pm_token_t do_keyword;
+            pm_token_t do_keyword = { 0 };
             if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
                 do_keyword = parser->previous;
             } else {
-                do_keyword = not_provided(parser);
                 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
             }
 
@@ -19961,381 +20070,122 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             }
 
             parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
-            expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
 
-            return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+            return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
         }
         case PM_TOKEN_PERCENT_LOWER_I: {
             parser_lex(parser);
             pm_token_t opening = parser->previous;
             pm_array_node_t *array = pm_array_node_create(parser, &opening);
+            pm_node_t *current = NULL;
 
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
 
-                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
-                    pm_token_t opening = not_provided(parser);
-                    pm_token_t closing = not_provided(parser);
-                    pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
-                }
-
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
-            }
-
-            pm_token_t closing = parser->current;
-            if (match1(parser, PM_TOKEN_EOF)) {
-                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
-            } else {
-                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
-            }
-            pm_array_node_close_set(array, &closing);
-
-            return (pm_node_t *) array;
-        }
-        case PM_TOKEN_PERCENT_UPPER_I: {
-            parser_lex(parser);
-            pm_token_t opening = parser->previous;
-            pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // This is the current node that we are parsing that will be added to the
-            // list of elements.
-            pm_node_t *current = NULL;
-
-            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                switch (parser->current.type) {
-                    case PM_TOKEN_WORDS_SEP: {
-                        if (current == NULL) {
-                            // If we hit a separator before we have any content, then we don't
-                            // need to do anything.
-                        } else {
-                            // If we hit a separator after we've hit content, then we need to
-                            // append that content to the list and reset the current node.
-                            pm_array_node_elements_append(array, current);
-                            current = NULL;
-                        }
-
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+                        parser_lex(parser);
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                        pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                        parser_lex(parser);
+                        pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                        pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                        pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
+                        pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+                        pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
                         parser_lex(parser);
-                        break;
-                    }
-                    case PM_TOKEN_STRING_CONTENT: {
-                        pm_token_t opening = not_provided(parser);
-                        pm_token_t closing = not_provided(parser);
-
-                        if (current == NULL) {
-                            // If we hit content and the current node is NULL, then this is
-                            // the first string content we've seen. In that case we're going
-                            // to create a new string node and set that to the current.
-                            current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
-                            parser_lex(parser);
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
-                            // If we hit string content and the current node is an
-                            // interpolated string, then we need to append the string content
-                            // to the list of child nodes.
-                            pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
-                            parser_lex(parser);
-
-                            pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit string content and the current node is a symbol node,
-                            // then we need to convert the current node into an interpolated
-                            // string and add the string content to the list of child nodes.
-                            pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
-                            pm_token_t bounds = not_provided(parser);
-
-                            pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
-                            pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
-                            pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
-                            parser_lex(parser);
-
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-                            pm_interpolated_symbol_node_append(interpolated, first_string);
-                            pm_interpolated_symbol_node_append(interpolated, second_string);
-
-                            xfree(current);
-                            current = (pm_node_t *) interpolated;
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        break;
-                    }
-                    case PM_TOKEN_EMBVAR: {
-                        bool start_location_set = false;
-                        if (current == NULL) {
-                            // If we hit an embedded variable and the current node is NULL,
-                            // then this is the start of a new string. We'll set the current
-                            // node to a new interpolated string.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit an embedded variable and the current node is a string
-                            // node, then we'll convert the current into an interpolated
-                            // string and add the string node to the list of parts.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
-                            current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
-                            pm_interpolated_symbol_node_append(interpolated, current);
-                            interpolated->base.location.start = current->location.start;
-                            start_location_set = true;
-                            current = (pm_node_t *) interpolated;
-                        } else {
-                            // If we hit an embedded variable and the current node is an
-                            // interpolated string, then we'll just add the embedded variable.
-                        }
 
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
-                        if (!start_location_set) {
-                            current->location.start = part->location.start;
-                        }
-                        break;
-                    }
-                    case PM_TOKEN_EMBEXPR_BEGIN: {
-                        bool start_location_set = false;
-                        if (current == NULL) {
-                            // If we hit an embedded expression and the current node is NULL,
-                            // then this is the start of a new string. We'll set the current
-                            // node to a new interpolated string.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit an embedded expression and the current node is a
-                            // string node, then we'll convert the current into an
-                            // interpolated string and add the string node to the list of
-                            // parts.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
-                            current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
-                            pm_interpolated_symbol_node_append(interpolated, current);
-                            interpolated->base.location.start = current->location.start;
-                            start_location_set = true;
-                            current = (pm_node_t *) interpolated;
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
-                            // If we hit an embedded expression and the current node is an
-                            // interpolated string, then we'll just continue on.
-                        } else {
-                            assert(false && "unreachable");
-                        }
+                        pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+                        pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+                        pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
 
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
-                        if (!start_location_set) {
-                            current->location.start = part->location.start;
-                        }
-                        break;
+                        // current is arena-allocated so no explicit free is needed.
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
                     }
-                    default:
-                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
-                        parser_lex(parser);
-                        break;
                 }
-            }
 
-            // If we have a current node, then we need to append it to the list.
-            if (current) {
-                pm_array_node_elements_append(array, current);
+                if (current) {
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+                }
             }
 
             pm_token_t closing = parser->current;
             if (match1(parser, PM_TOKEN_EOF)) {
-                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
+                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
             } else {
-                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
             }
-            pm_array_node_close_set(array, &closing);
+            pm_array_node_close_set(parser, array, &closing);
 
-            return (pm_node_t *) array;
+            return UP(array);
         }
+        case PM_TOKEN_PERCENT_UPPER_I:
+            return parse_symbol_array(parser, depth);
         case PM_TOKEN_PERCENT_LOWER_W: {
             parser_lex(parser);
             pm_token_t opening = parser->previous;
             pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // skip all leading whitespaces
-            accept1(parser, PM_TOKEN_WORDS_SEP);
+            pm_node_t *current = NULL;
 
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
 
-                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
-                    pm_token_t opening = not_provided(parser);
-                    pm_token_t closing = not_provided(parser);
-
-                    pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
-                    pm_array_node_elements_append(array, string);
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = string;
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                        pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                        pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                        pm_interpolated_string_node_append(parser, interpolated, current);
+                        pm_interpolated_string_node_append(parser, interpolated, string);
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
+                    }
+                    parser_lex(parser);
                 }
 
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                if (current) {
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                }
             }
 
             pm_token_t closing = parser->current;
             if (match1(parser, PM_TOKEN_EOF)) {
                 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
             } else {
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
             }
 
-            pm_array_node_close_set(array, &closing);
-            return (pm_node_t *) array;
-        }
-        case PM_TOKEN_PERCENT_UPPER_W: {
-            parser_lex(parser);
-            pm_token_t opening = parser->previous;
-            pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // This is the current node that we are parsing that will be added
-            // to the list of elements.
-            pm_node_t *current = NULL;
-
-            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                switch (parser->current.type) {
-                    case PM_TOKEN_WORDS_SEP: {
-                        // Reset the explicit encoding if we hit a separator
-                        // since each element can have its own encoding.
-                        parser->explicit_encoding = NULL;
-
-                        if (current == NULL) {
-                            // If we hit a separator before we have any content,
-                            // then we don't need to do anything.
-                        } else {
-                            // If we hit a separator after we've hit content,
-                            // then we need to append that content to the list
-                            // and reset the current node.
-                            pm_array_node_elements_append(array, current);
-                            current = NULL;
-                        }
-
-                        parser_lex(parser);
-                        break;
-                    }
-                    case PM_TOKEN_STRING_CONTENT: {
-                        pm_token_t opening = not_provided(parser);
-                        pm_token_t closing = not_provided(parser);
-
-                        pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
-                        pm_node_flag_set(string, parse_unescaped_encoding(parser));
-                        parser_lex(parser);
-
-                        if (current == NULL) {
-                            // If we hit content and the current node is NULL,
-                            // then this is the first string content we've seen.
-                            // In that case we're going to create a new string
-                            // node and set that to the current.
-                            current = string;
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
-                            // If we hit string content and the current node is
-                            // an interpolated string, then we need to append
-                            // the string content to the list of child nodes.
-                            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit string content and the current node is
-                            // a string node, then we need to convert the
-                            // current node into an interpolated string and add
-                            // the string content to the list of child nodes.
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
-                            pm_interpolated_string_node_append(interpolated, current);
-                            pm_interpolated_string_node_append(interpolated, string);
-                            current = (pm_node_t *) interpolated;
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        break;
-                    }
-                    case PM_TOKEN_EMBVAR: {
-                        if (current == NULL) {
-                            // If we hit an embedded variable and the current
-                            // node is NULL, then this is the start of a new
-                            // string. We'll set the current node to a new
-                            // interpolated string.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit an embedded variable and the current
-                            // node is a string node, then we'll convert the
-                            // current into an interpolated string and add the
-                            // string node to the list of parts.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
-                            pm_interpolated_string_node_append(interpolated, current);
-                            current = (pm_node_t *) interpolated;
-                        } else {
-                            // If we hit an embedded variable and the current
-                            // node is an interpolated string, then we'll just
-                            // add the embedded variable.
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
-                        break;
-                    }
-                    case PM_TOKEN_EMBEXPR_BEGIN: {
-                        if (current == NULL) {
-                            // If we hit an embedded expression and the current
-                            // node is NULL, then this is the start of a new
-                            // string. We'll set the current node to a new
-                            // interpolated string.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit an embedded expression and the current
-                            // node is a string node, then we'll convert the
-                            // current into an interpolated string and add the
-                            // string node to the list of parts.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
-                            pm_interpolated_string_node_append(interpolated, current);
-                            current = (pm_node_t *) interpolated;
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
-                            // If we hit an embedded expression and the current
-                            // node is an interpolated string, then we'll just
-                            // continue on.
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
-                        break;
-                    }
-                    default:
-                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
-                        parser_lex(parser);
-                        break;
-                }
-            }
-
-            // If we have a current node, then we need to append it to the list.
-            if (current) {
-                pm_array_node_elements_append(array, current);
-            }
-
-            pm_token_t closing = parser->current;
-            if (match1(parser, PM_TOKEN_EOF)) {
-                pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
-            } else {
-                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
-            }
-
-            pm_array_node_close_set(array, &closing);
-            return (pm_node_t *) array;
+            pm_array_node_close_set(parser, array, &closing);
+            return UP(array);
         }
+        case PM_TOKEN_PERCENT_UPPER_W:
+            return parse_string_array(parser, depth);
         case PM_TOKEN_REGEXP_BEGIN: {
             pm_token_t opening = parser->current;
             parser_lex(parser);
@@ -20352,10 +20202,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
                 parser_lex(parser);
 
-                pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
-                pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
-
-                return node;
+                pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+                pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
+                return UP(node);
             }
 
             pm_interpolated_regular_expression_node_t *interpolated;
@@ -20367,7 +20216,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 // regular expression) or if it's not then it has interpolation.
                 pm_string_t unescaped = parser->current_string;
                 pm_token_t content = parser->current;
-                bool ascii_only = parser->current_regular_expression_ascii_only;
                 parser_lex(parser);
 
                 // If we hit an end, then we can create a regular expression
@@ -20376,26 +20224,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
                     pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
 
-                    // If we're not immediately followed by a =~, then we want
-                    // to parse all of the errors at this point. If it is
-                    // followed by a =~, then it will get parsed higher up while
-                    // parsing the named captures as well.
+                    // If we're not immediately followed by a =~, then we
+                    // parse and validate now. If it is followed by a =~,
+                    // then it will get parsed in the =~ handler where
+                    // named captures can also be extracted.
                     if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
-                        parse_regular_expression_errors(parser, node);
+                        pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
                     }
 
-                    pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
-                    return (pm_node_t *) node;
+                    return UP(node);
                 }
 
                 // If we get here, then we have interpolation so we'll need to create
                 // a regular expression node with interpolation.
                 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
 
-                pm_token_t opening = not_provided(parser);
-                pm_token_t closing = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
-
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
                 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
                     // This is extremely strange, but the first string part of a
                     // regular expression will always be tagged as binary if we
@@ -20403,7 +20247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
                 }
 
-                pm_interpolated_regular_expression_node_append(interpolated, part);
+                pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
             } else {
                 // If the first part of the body of the regular expression is not a
                 // string content, then we have interpolation and we need to create an
@@ -20416,20 +20260,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_node_t *part;
             while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
                 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                    pm_interpolated_regular_expression_node_append(interpolated, part);
+                    pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
                 }
             }
 
             pm_token_t closing = parser->current;
             if (match1(parser, PM_TOKEN_EOF)) {
                 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
             } else {
                 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
             }
 
             pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
-            return (pm_node_t *) interpolated;
+            return UP(interpolated);
         }
         case PM_TOKEN_BACKTICK:
         case PM_TOKEN_PERCENT_LOWER_X: {
@@ -20451,7 +20295,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 };
 
                 parser_lex(parser);
-                return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
+                return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
             }
 
             pm_interpolated_x_string_node_t *node;
@@ -20466,7 +20310,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 parser_lex(parser);
 
                 if (match1(parser, PM_TOKEN_STRING_END)) {
-                    pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+                    pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
                     pm_node_flag_set(node, parse_unescaped_encoding(parser));
                     parser_lex(parser);
                     return node;
@@ -20476,13 +20320,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 // create a string node with interpolation.
                 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
 
-                pm_token_t opening = not_provided(parser);
-                pm_token_t closing = not_provided(parser);
-
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
                 pm_node_flag_set(part, parse_unescaped_encoding(parser));
 
-                pm_interpolated_xstring_node_append(node, part);
+                pm_interpolated_xstring_node_append(parser->arena, node, part);
             } else {
                 // If the first part of the body of the string is not a string
                 // content, then we have interpolation and we need to create an
@@ -20493,20 +20334,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             pm_node_t *part;
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
                 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
-                    pm_interpolated_xstring_node_append(node, part);
+                    pm_interpolated_xstring_node_append(parser->arena, node, part);
                 }
             }
 
             pm_token_t closing = parser->current;
             if (match1(parser, PM_TOKEN_EOF)) {
                 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
-                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
             } else {
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
             }
-            pm_interpolated_xstring_node_closing_set(node, &closing);
+            pm_interpolated_xstring_node_closing_set(parser, node, &closing);
 
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_USTAR: {
             parser_lex(parser);
@@ -20516,17 +20357,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             // still lex past it though and create a missing node place.
             if (binding_power != PM_BINDING_POWER_STATEMENT) {
                 pm_parser_err_prefix(parser, diag_id);
-                return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+                return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
             }
 
             pm_token_t operator = parser->previous;
             pm_node_t *name = NULL;
 
             if (token_begins_expression_p(parser->current.type)) {
-                name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
             }
 
-            pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
+            pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
 
             if (match1(parser, PM_TOKEN_COMMA)) {
                 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -20542,11 +20383,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
 
             pm_token_t operator = parser->previous;
-            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
             pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
 
             pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_TILDE: {
             if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20555,10 +20396,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
 
             pm_token_t operator = parser->previous;
-            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
             pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
 
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_UMINUS: {
             if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20567,22 +20408,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
 
             pm_token_t operator = parser->previous;
-            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
             pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
 
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_UMINUS_NUM: {
             parser_lex(parser);
 
             pm_token_t operator = parser->previous;
-            pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
 
             if (accept1(parser, PM_TOKEN_STAR_STAR)) {
                 pm_token_t exponent_operator = parser->previous;
-                pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
-                node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
-                node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+                pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
+                node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
             } else {
                 switch (PM_NODE_TYPE(node)) {
                     case PM_INTEGER_NODE:
@@ -20592,7 +20433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         parse_negative_numeric(node);
                         break;
                     default:
-                        node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+                        node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
                         break;
                 }
             }
@@ -20626,13 +20467,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     accept1(parser, PM_TOKEN_NEWLINE);
                     expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
 
-                    pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+                    pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
                     break;
                 }
                 case PM_CASE_PARAMETER: {
                     pm_accepts_block_stack_push(parser, false);
-                    pm_token_t opening = not_provided(parser);
-                    block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
+                    block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
                     pm_accepts_block_stack_pop(parser);
                     break;
                 }
@@ -20650,39 +20490,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 opening = parser->previous;
 
                 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
-                    body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
+                    body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
                 }
 
                 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
-                expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
+                expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
             } else {
                 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
                 opening = parser->previous;
 
                 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
-                    pm_accepts_block_stack_push(parser, true);
-                    body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
-                    pm_accepts_block_stack_pop(parser);
+                    body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
                 }
 
                 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
                     assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
-                    body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
+                    body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
                 } else {
                     parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
                 }
 
-                expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
+                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
             }
 
             pm_constant_id_list_t locals;
             pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
-            pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
+            pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
 
             pm_parser_scope_pop(parser);
             pm_accepts_block_stack_pop(parser);
 
-            return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
+            return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
         }
         case PM_TOKEN_UPLUS: {
             if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20691,13 +20529,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
 
             pm_token_t operator = parser->previous;
-            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
             pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
 
-            return (pm_node_t *) node;
+            return UP(node);
         }
         case PM_TOKEN_STRING_BEGIN:
-            return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
+            return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
         case PM_TOKEN_SYMBOL_BEGIN: {
             pm_lex_mode_t lex_mode = *parser->lex_modes.current;
             parser_lex(parser);
@@ -20720,17 +20558,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 // If we get here, then we are assuming this token is closing a
                 // parent context, so we'll indicate that to the user so that
                 // they know how we behaved.
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
             } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
                 // We're going to make a special case here, because "cannot
                 // parse expression" is pretty generic, and we know here that we
                 // have an unexpected token.
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
             } else {
                 pm_parser_err_prefix(parser, diag_id);
             }
 
-            return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+            return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
         }
     }
 }
@@ -20745,8 +20583,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
  * or any of the binary operators that can be written to a variable.
  */
 static pm_node_t *
-parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
-    pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
+parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
+
+    // Assignments whose value is a command call (e.g., a = b c) can only
+    // be followed by modifiers (if/unless/while/until/rescue) and not by
+    // operators with higher binding power. If we find one, emit an error
+    // and skip the operator and its right-hand side.
+    if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        parser_lex(parser);
+        parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+    }
 
     // Contradicting binding powers, the right-hand-side value of the assignment
     // allows the `rescue` modifier.
@@ -20756,10 +20604,10 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
         pm_token_t rescue = parser->current;
         parser_lex(parser);
 
-        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
         context_pop(parser);
 
-        return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+        return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
     }
 
     return value;
@@ -20814,35 +20662,46 @@ parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
  * operator that allows multiple values after it.
  */
 static pm_node_t *
-parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
     bool permitted = true;
     if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
 
-    pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
+    pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
     if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
 
     parse_assignment_value_local(parser, value);
     bool single_value = true;
 
-    if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
+    // Block calls (command call + do block, e.g., `foo bar do end`) cannot
+    // be followed by a comma to form a multi-value RHS because each element
+    // of a multi-value assignment must be an `arg`, not a `block_call`.
+    if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
         single_value = false;
 
-        pm_token_t opening = not_provided(parser);
-        pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-        pm_array_node_elements_append(array, value);
-        value = (pm_node_t *) array;
+        pm_array_node_t *array = pm_array_node_create(parser, NULL);
+        pm_array_node_elements_append(parser->arena, array, value);
+        value = UP(array);
 
         while (accept1(parser, PM_TOKEN_COMMA)) {
             pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
 
-            pm_array_node_elements_append(array, element);
-            if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+            pm_array_node_elements_append(parser->arena, array, element);
+            if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
 
             parse_assignment_value_local(parser, element);
         }
     }
 
+    // Assignments whose value is a command call (e.g., a = b c) can only
+    // be followed by modifiers (if/unless/while/until/rescue) and not by
+    // operators with higher binding power. If we find one, emit an error
+    // and skip the operator and its right-hand side.
+    if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        parser_lex(parser);
+        parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+    }
+
     // Contradicting binding powers, the right-hand-side value of the assignment
     // allows the `rescue` modifier.
     if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
@@ -20857,15 +20716,15 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
         // but without parenthesis.
         if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
             pm_call_node_t *call_node = (pm_call_node_t *) value;
-            if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
+            if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
                 accepts_command_call_inner = true;
             }
         }
 
-        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
         context_pop(parser);
 
-        return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+        return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
     }
 
     return value;
@@ -20882,43 +20741,18 @@ static void
 parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
     if (call_node->arguments != NULL) {
         pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
-        pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
+        pm_node_unreference(parser, UP(call_node->arguments));
         call_node->arguments = NULL;
     }
 
     if (call_node->block != NULL) {
         pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
-        pm_node_destroy(parser, (pm_node_t *) call_node->block);
+        pm_node_unreference(parser, UP(call_node->block));
         call_node->block = NULL;
     }
 }
 
-/**
- * This struct is used to pass information between the regular expression parser
- * and the named capture callback.
- */
-typedef struct {
-    /** The parser that is parsing the regular expression. */
-    pm_parser_t *parser;
-
-    /** The call node wrapping the regular expression node. */
-    pm_call_node_t *call;
-
-    /** The match write node that is being created. */
-    pm_match_write_node_t *match;
-
-    /** The list of names that have been parsed. */
-    pm_constant_id_list_t names;
-
-    /**
-     * Whether the content of the regular expression is shared. This impacts
-     * whether or not we used owned constants or shared constants in the
-     * constant pool for the names of the captures.
-     */
-    bool shared;
-} parse_regular_expression_named_capture_data_t;
-
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
 pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
     cursor++;
 
@@ -20939,7 +20773,7 @@ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const
     return cursor;
 }
 
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
 pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
     uint8_t value = (uint8_t) (*cursor - '0');
     cursor++;
@@ -20958,8 +20792,8 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con
     return cursor;
 }
 
-static inline const uint8_t *
-pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+static PRISM_INLINE const uint8_t *
+pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
     const uint8_t *start = cursor - 1;
     cursor++;
 
@@ -20970,7 +20804,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
 
     if (*cursor != '{') {
         size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
-        uint32_t value = escape_unicode(parser, cursor, length);
+        uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
 
         if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
             pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
@@ -20990,7 +20824,10 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
         }
 
         size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
-        uint32_t value = escape_unicode(parser, cursor, length);
+        if (length == 0) {
+            break;
+        }
+        uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
 
         (void) pm_buffer_append_unicode_codepoint(unescaped, value);
         cursor += length;
@@ -21000,7 +20837,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
 }
 
 static void
-pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
+pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
     const uint8_t *end = source + length;
     pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
 
@@ -21018,7 +20855,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
                 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
                 break;
             case 'u':
-                cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
+                cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
                 break;
             default:
                 pm_buffer_append_byte(unescaped, '\\');
@@ -21040,10 +20877,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
  * capture group.
  */
 static void
-parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
-    parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
-
-    pm_parser_t *parser = callback_data->parser;
+parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
     pm_call_node_t *call = callback_data->call;
     pm_constant_id_list_t *names = &callback_data->names;
 
@@ -21061,55 +20895,56 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
     // unescaped, which is what we need.
     const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
     if (PRISM_UNLIKELY(cursor != NULL)) {
-        pm_named_capture_escape(parser, &unescaped, source, length, cursor);
+        pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
         source = (const uint8_t *) pm_buffer_value(&unescaped);
         length = pm_buffer_length(&unescaped);
     }
 
-    pm_location_t location;
+    const uint8_t *start;
+    const uint8_t *end;
     pm_constant_id_t name;
 
     // If the name of the capture group isn't a valid identifier, we do
     // not add it to the local table.
     if (!pm_slice_is_valid_local(parser, source, source + length)) {
-        pm_buffer_free(&unescaped);
+        pm_buffer_cleanup(&unescaped);
         return;
     }
 
-    if (callback_data->shared) {
+    if (shared) {
         // If the unescaped string is a slice of the source, then we can
         // copy the names directly. The pointers will line up.
-        location = (pm_location_t) { .start = source, .end = source + length };
-        name = pm_parser_constant_id_location(parser, location.start, location.end);
+        start = source;
+        end = source + length;
+        name = pm_parser_constant_id_raw(parser, start, end);
     } else {
         // Otherwise, the name is a slice of the malloc-ed owned string,
         // in which case we need to copy it out into a new string.
-        location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
-
-        void *memory = xmalloc(length);
-        if (memory == NULL) abort();
+        start = parser->start + PM_NODE_START(call->receiver);
+        end = parser->start + PM_NODE_END(call->receiver);
 
+        uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
         memcpy(memory, source, length);
-        name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+        name = pm_parser_constant_id_owned(parser, memory, length);
     }
 
     // Add this name to the list of constants if it is valid, not duplicated,
     // and not a keyword.
     if (name != 0 && !pm_constant_id_list_includes(names, name)) {
-        pm_constant_id_list_append(names, name);
+        pm_constant_id_list_append(parser->arena, names, name);
 
         int depth;
         if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
             // If the local is not already a local but it is a keyword, then we
             // do not want to add a capture for this.
             if (pm_local_is_keyword((const char *) source, length)) {
-                pm_buffer_free(&unescaped);
+                pm_buffer_cleanup(&unescaped);
                 return;
             }
 
             // If the identifier is not already a local, then we will add it to
             // the local table.
-            pm_parser_local_add(parser, name, location.start, location.end, 0);
+            pm_parser_local_add(parser, name, start, end, 0);
         }
 
         // Here we lazily create the MatchWriteNode since we know we're
@@ -21120,45 +20955,37 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
 
         // Next, create the local variable target and add it to the list of
         // targets for the match.
-        pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
-        pm_node_list_append(&callback_data->match->targets, target);
+        pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
+        pm_node_list_append(parser->arena, &callback_data->match->targets, target);
     }
 
-    pm_buffer_free(&unescaped);
+    pm_buffer_cleanup(&unescaped);
 }
 
 /**
- * Potentially change a =~ with a regular expression with named captures into a
- * match write node.
+ * Potentially change a =~ with an interpolated regular expression with named
+ * captures into a match write node. This is for the interpolated case where
+ * we have concatenated content rather than a regular expression node.
  */
 static pm_node_t *
-parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
-    parse_regular_expression_named_capture_data_t callback_data = {
-        .parser = parser,
+parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
+    pm_regexp_name_data_t callback_data = {
         .call = call,
+        .match = NULL,
         .names = { 0 },
-        .shared = content->type == PM_STRING_SHARED
     };
 
-    parse_regular_expression_error_data_t error_data = {
-        .parser = parser,
-        .start = call->receiver->location.start,
-        .end = call->receiver->location.end,
-        .shared = content->type == PM_STRING_SHARED
-    };
-
-    pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
-    pm_constant_id_list_free(&callback_data.names);
+    pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
 
     if (callback_data.match != NULL) {
-        return (pm_node_t *) callback_data.match;
+        return UP(callback_data.match);
     } else {
-        return (pm_node_t *) call;
+        return UP(call);
     }
 }
 
-static inline pm_node_t *
-parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
     pm_token_t token = parser->current;
 
     switch (token.type) {
@@ -21171,13 +20998,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // is parsed because it could be referenced in the value.
                     pm_call_node_t *call_node = (pm_call_node_t *) node;
                     if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
-                        pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
+                        pm_parser_local_add_location(parser, &call_node->message_loc, 0);
                     }
                 }
                 PRISM_FALLTHROUGH
                 case PM_CASE_WRITABLE: {
+                    // When we have `it = value`, we need to add `it` as a local
+                    // variable before parsing the value, in case the value
+                    // references the variable.
+                    if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+                        pm_parser_local_add_location(parser, &node->location, 0);
+                    }
+
                     parser_lex(parser);
-                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
 
                     if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
                         pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
@@ -21190,8 +21024,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     pm_multi_target_node_targets_append(parser, multi_target, node);
 
                     parser_lex(parser);
-                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
-                    return parse_write(parser, (pm_node_t *) multi_target, &token, value);
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+                    return parse_write(parser, UP(multi_target), &token, value);
                 }
                 case PM_SOURCE_ENCODING_NODE:
                 case PM_FALSE_NODE:
@@ -21203,7 +21037,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In these special cases, we have specific error messages
                     // and we will replace them with local variable writes.
                     parser_lex(parser);
-                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
                     return parse_unwriteable_write(parser, node, &token, value);
                 }
                 default:
@@ -21224,71 +21058,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CLASS_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CONSTANT_PATH_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
 
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_CONSTANT_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_INSTANCE_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
                     pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
 
-                    parse_target_implicit_parameter(parser, node);
-                    pm_node_destroy(parser, node);
+                    pm_node_unreference(parser, node);
                     return result;
                 }
                 case PM_LOCAL_VARIABLE_READ_NODE: {
-                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
-                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
-                        parse_target_implicit_parameter(parser, node);
+                    if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
+                        pm_node_unreference(parser, node);
                     }
 
                     pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CALL_NODE: {
@@ -21298,16 +21126,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // receiver that could have been a local variable) then we
                     // will transform it into a local variable write.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
-                        pm_location_t *message_loc = &cast->message_loc;
-                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
-                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+                        pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
                         parser_lex(parser);
 
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                        pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
 
-                        pm_node_destroy(parser, (pm_node_t *) cast);
                         return result;
                     }
 
@@ -21319,8 +21144,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // this is an aref expression, and we can transform it into
                     // an aset expression.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                        return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                        return UP(pm_index_and_write_node_create(parser, cast, &token, value));
                     }
 
                     // If this node cannot be writable, then we have an error.
@@ -21331,8 +21156,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
 
                     parse_call_operator_write(parser, cast, &token);
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
-                    return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    return UP(pm_call_and_write_node_create(parser, cast, &token, value));
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
@@ -21358,71 +21183,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CLASS_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CONSTANT_PATH_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
 
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_CONSTANT_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_INSTANCE_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
                     pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
 
-                    parse_target_implicit_parameter(parser, node);
-                    pm_node_destroy(parser, node);
+                    pm_node_unreference(parser, node);
                     return result;
                 }
                 case PM_LOCAL_VARIABLE_READ_NODE: {
-                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
-                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
-                        parse_target_implicit_parameter(parser, node);
+                    if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+                        PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+                        pm_node_unreference(parser, node);
                     }
 
                     pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CALL_NODE: {
@@ -21432,16 +21251,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // receiver that could have been a local variable) then we
                     // will transform it into a local variable write.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
-                        pm_location_t *message_loc = &cast->message_loc;
-                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
-                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+                        pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
                         parser_lex(parser);
 
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                        pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
 
-                        pm_node_destroy(parser, (pm_node_t *) cast);
                         return result;
                     }
 
@@ -21453,8 +21269,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // this is an aref expression, and we can transform it into
                     // an aset expression.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                        return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                        return UP(pm_index_or_write_node_create(parser, cast, &token, value));
                     }
 
                     // If this node cannot be writable, then we have an error.
@@ -21465,8 +21281,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
 
                     parse_call_operator_write(parser, cast, &token);
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
-                    return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    return UP(pm_call_or_write_node_create(parser, cast, &token, value));
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
@@ -21502,71 +21318,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CLASS_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CONSTANT_PATH_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
 
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_CONSTANT_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return parse_shareable_constant_write(parser, write);
                 }
                 case PM_INSTANCE_VARIABLE_READ_NODE: {
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
                     pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
 
-                    parse_target_implicit_parameter(parser, node);
-                    pm_node_destroy(parser, node);
+                    pm_node_unreference(parser, node);
                     return result;
                 }
                 case PM_LOCAL_VARIABLE_READ_NODE: {
-                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
-                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
-                        parse_target_implicit_parameter(parser, node);
+                    if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+                        PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+                        pm_node_unreference(parser, node);
                     }
 
                     pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
                     parser_lex(parser);
 
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
 
-                    pm_node_destroy(parser, node);
                     return result;
                 }
                 case PM_CALL_NODE: {
@@ -21577,14 +21387,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // receiver that could have been a local variable) then we
                     // will transform it into a local variable write.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
-                        pm_location_t *message_loc = &cast->message_loc;
-                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+                        pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
 
-                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                        pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
-
-                        pm_node_destroy(parser, (pm_node_t *) cast);
                         return result;
                     }
 
@@ -21592,8 +21399,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // this is an aref expression, and we can transform it into
                     // an aset expression.
                     if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
-                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                        return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                        return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
                     }
 
                     // If this node cannot be writable, then we have an error.
@@ -21604,8 +21411,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
 
                     parse_call_operator_write(parser, cast, &token);
-                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-                    return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
@@ -21618,7 +21425,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In this case we have an operator but we don't know what it's for.
                     // We need to treat it as an error. For now, we'll mark it as an error
                     // and just skip right past it.
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
                     return node;
             }
         }
@@ -21626,15 +21433,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
         case PM_TOKEN_KEYWORD_AND: {
             parser_lex(parser);
 
-            pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
+            pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_and_node_create(parser, node, &token, right));
         }
         case PM_TOKEN_KEYWORD_OR:
         case PM_TOKEN_PIPE_PIPE: {
             parser_lex(parser);
 
-            pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
+            pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_or_node_create(parser, node, &token, right));
         }
         case PM_TOKEN_EQUAL_TILDE: {
             // Note that we _must_ parse the value before adding the local
@@ -21645,11 +21452,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             //
             // In this case, `foo` should be a method call and not a local yet.
             parser_lex(parser);
-            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
 
             // By default, we're going to create a call node and then return it.
             pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
-            pm_node_t *result = (pm_node_t *) call;
+            pm_node_t *result = UP(call);
 
             // If the receiver of this =~ is a regular expression node, then we
             // need to introduce local variables for it based on its named
@@ -21690,14 +21497,25 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     pm_string_t owned;
                     pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
 
-                    result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
-                    pm_string_free(&owned);
+                    result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+                    pm_string_cleanup(&owned);
                 }
             } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
-                // If we have a regular expression node, then we can just parse
-                // the named captures directly off the unescaped string.
-                const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
-                result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+                // If we have a regular expression node, then we can parse
+                // the named captures and validate encoding in one pass.
+                pm_regular_expression_node_t *regexp = (pm_regular_expression_node_t *) node;
+
+                pm_regexp_name_data_t name_data = {
+                    .call = call,
+                    .match = NULL,
+                    .names = { 0 },
+                };
+
+                pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
+
+                if (name_data.match != NULL) {
+                    result = UP(name_data.match);
+                }
             }
 
             return result;
@@ -21729,21 +21547,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 case PM_RESCUE_MODIFIER_NODE: {
                     pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
                 case PM_AND_NODE: {
                     pm_and_node_t *cast = (pm_and_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
                 case PM_OR_NODE: {
                     pm_or_node_t *cast = (pm_or_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
@@ -21751,20 +21569,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     break;
             }
 
-            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
+            pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
         }
         case PM_TOKEN_GREATER:
         case PM_TOKEN_GREATER_EQUAL:
         case PM_TOKEN_LESS:
         case PM_TOKEN_LESS_EQUAL: {
             if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
-                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
             }
 
             parser_lex(parser);
-            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
+            pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
         }
         case PM_TOKEN_AMPERSAND_DOT:
         case PM_TOKEN_DOT: {
@@ -21775,28 +21593,28 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             // This if statement handles the foo.() syntax.
             if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
                 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
-                return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
+                return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
             }
 
             switch (PM_NODE_TYPE(node)) {
                 case PM_RESCUE_MODIFIER_NODE: {
                     pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
                 case PM_AND_NODE: {
                     pm_and_node_t *cast = (pm_and_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
                 case PM_OR_NODE: {
                     pm_or_node_t *cast = (pm_or_node_t *) node;
                     if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
                     }
                     break;
                 }
@@ -21817,23 +21635,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     break;
                 }
                 default: {
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
-                    message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
+                    message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
                 }
             }
 
-            parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+            parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
             pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
 
             if (
                 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
                 arguments.arguments == NULL &&
-                arguments.opening_loc.start == NULL &&
+                arguments.opening_loc.length == 0 &&
                 match1(parser, PM_TOKEN_COMMA)
             ) {
-                return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
             } else {
-                return (pm_node_t *) call;
+                return UP(call);
             }
         }
         case PM_TOKEN_DOT_DOT:
@@ -21842,40 +21660,40 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
 
             pm_node_t *right = NULL;
             if (token_begins_expression_p(parser->current.type)) {
-                right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
             }
 
-            return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
+            return UP(pm_range_node_create(parser, node, &token, right));
         }
         case PM_TOKEN_KEYWORD_IF_MODIFIER: {
             pm_token_t keyword = parser->current;
             parser_lex(parser);
 
-            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
         }
         case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
             pm_token_t keyword = parser->current;
             parser_lex(parser);
 
-            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
         }
         case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
             parser_lex(parser);
             pm_statements_node_t *statements = pm_statements_node_create(parser);
             pm_statements_node_body_append(parser, statements, node, true);
 
-            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
         }
         case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
             parser_lex(parser);
             pm_statements_node_t *statements = pm_statements_node_create(parser);
             pm_statements_node_body_append(parser, statements, node, true);
 
-            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
-            return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
         }
         case PM_TOKEN_QUESTION_MARK: {
             context_push(parser, PM_CONTEXT_TERNARY);
@@ -21885,7 +21703,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             pm_token_t qmark = parser->current;
             parser_lex(parser);
 
-            pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
+            pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
 
             if (parser->recovering) {
                 // If parsing the true expression of this ternary resulted in a syntax
@@ -21894,27 +21712,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 // before the `expect` function call to make sure it doesn't
                 // accidentally move past a ':' token that occurs after the syntax
                 // error.
-                pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
-                pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
+                pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+                pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
 
                 context_pop(parser);
                 pop_block_exits(parser, previous_block_exits);
-                pm_node_list_free(&current_block_exits);
-
-                return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+                return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
             }
 
             accept1(parser, PM_TOKEN_NEWLINE);
             expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
 
             pm_token_t colon = parser->previous;
-            pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
+            pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
 
             context_pop(parser);
             pop_block_exits(parser, previous_block_exits);
-            pm_node_list_free(&current_block_exits);
-
-            return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+            return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
         }
         case PM_TOKEN_COLON_COLON: {
             parser_lex(parser);
@@ -21927,7 +21741,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
 
                     if (
                         (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
-                        (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
+                        ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
                     ) {
                         // If we have a constant immediately following a '::' operator, then
                         // this can either be a constant path or a method call, depending on
@@ -21938,11 +21752,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                         pm_token_t message = parser->previous;
                         pm_arguments_t arguments = { 0 };
 
-                        parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
-                        path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
+                        parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+                        path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
                     } else {
                         // Otherwise, this is a constant path. That would look like Foo::Bar.
-                        path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+                        path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
                     }
 
                     // If this is followed by a comma then it is a multiple assignment.
@@ -21962,15 +21776,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // If we have an identifier following a '::' operator, then it is for
                     // sure a method call.
                     pm_arguments_t arguments = { 0 };
-                    parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                    parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
                     pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
 
                     // If this is followed by a comma then it is a multiple assignment.
                     if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
-                        return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                        return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
                     }
 
-                    return (pm_node_t *) call;
+                    return UP(call);
                 }
                 case PM_TOKEN_PARENTHESIS_LEFT: {
                     // If we have a parenthesis following a '::' operator, then it is the
@@ -21978,11 +21792,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     pm_arguments_t arguments = { 0 };
                     parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
 
-                    return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
+                    return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
                 }
                 default: {
                     expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-                    return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+                    return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
                 }
             }
         }
@@ -21991,31 +21805,31 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             parser_lex(parser);
             accept1(parser, PM_TOKEN_NEWLINE);
 
-            pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+            pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
             context_pop(parser);
 
-            return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
+            return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
         }
         case PM_TOKEN_BRACKET_LEFT: {
             parser_lex(parser);
 
             pm_arguments_t arguments = { 0 };
-            arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+            arguments.opening_loc = TOK2LOC(parser, &parser->previous);
 
             if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
                 pm_accepts_block_stack_push(parser, true);
-                parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
+                parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
                 pm_accepts_block_stack_pop(parser);
                 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
             }
 
-            arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+            arguments.closing_loc = TOK2LOC(parser, &parser->previous);
 
             // If we have a comma after the closing bracket then this is a multiple
             // assignment and we should parse the targets.
             if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
                 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
-                return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
             }
 
             // If we're at the end of the arguments, we can now check if there is a
@@ -22031,17 +21845,17 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
 
             if (block != NULL) {
                 if (arguments.block != NULL) {
-                    pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
+                    pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
                     if (arguments.arguments == NULL) {
                         arguments.arguments = pm_arguments_node_create(parser);
                     }
-                    pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
+                    pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
                 }
 
-                arguments.block = (pm_node_t *) block;
+                arguments.block = UP(block);
             }
 
-            return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
+            return UP(pm_call_node_aref_create(parser, node, &arguments));
         }
         case PM_TOKEN_KEYWORD_IN: {
             bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22056,9 +21870,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
 
             parser->pattern_matching_newlines = previous_pattern_matching_newlines;
-            pm_constant_id_list_free(&captures);
 
-            return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
+            return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
         }
         case PM_TOKEN_EQUAL_GREATER: {
             bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22073,9 +21886,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
 
             parser->pattern_matching_newlines = previous_pattern_matching_newlines;
-            pm_constant_id_list_free(&captures);
 
-            return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
+            return UP(pm_match_required_node_create(parser, node, pattern, &operator));
         }
         default:
             assert(false && "unreachable");
@@ -22088,16 +21900,83 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
 #undef PM_PARSE_PATTERN_MULTI
 
 /**
- * Determine if a given call node looks like a "command", which means it has
- * arguments but does not have parentheses.
+ * Some nodes act as statements and limit which operators can follow. This
+ * function inspects the node and the upcoming token to determine whether the
+ * expression loop should stop. It is called both after prefix parsing and after
+ * each infix operator.
+ *
+ * As a side effect, this function also attaches do-blocks to command-style call
+ * nodes when appropriate.
+ *
+ * Returns true if the expression loop should stop (i.e., the next operator
+ * should not be consumed).
  */
-static inline bool
-pm_call_node_command_p(const pm_call_node_t *node) {
-    return (
-        (node->opening_loc.start == NULL) &&
-        (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
-        (node->arguments != NULL || node->block != NULL)
-    );
+static bool
+parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
+    pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_MULTI_WRITE_NODE:
+        case PM_RETURN_NODE:
+        case PM_BREAK_NODE:
+        case PM_NEXT_NODE:
+            return left > PM_BINDING_POWER_MODIFIER;
+        case PM_CLASS_VARIABLE_WRITE_NODE:
+        case PM_CONSTANT_PATH_WRITE_NODE:
+        case PM_CONSTANT_WRITE_NODE:
+        case PM_GLOBAL_VARIABLE_WRITE_NODE:
+        case PM_INSTANCE_VARIABLE_WRITE_NODE:
+        case PM_LOCAL_VARIABLE_WRITE_NODE:
+            return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
+        case PM_CALL_NODE: {
+            // Calls with an implicit array on the right-hand side are
+            // statements and can only be followed by modifiers.
+            if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
+                return left > PM_BINDING_POWER_MODIFIER;
+            }
+
+            // Command-style calls (including block commands like
+            // `foo bar do end`) can only be followed by composition
+            // (and/or) and modifier (if/unless/etc.) operators.
+            if (pm_command_call_value_p(node)) {
+                return left > PM_BINDING_POWER_COMPOSITION;
+            }
+
+            // A block call (command with do-block, or any call chained
+            // from one) can only be followed by call chaining (., ::,
+            // &.), composition (and/or), and modifier operators.
+            if (pm_block_call_p(node)) {
+                return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
+            }
+
+            return false;
+        }
+        case PM_SUPER_NODE:
+        case PM_YIELD_NODE:
+            // Command-style super/yield (without parens) can only be followed
+            // by composition and modifier operators.
+            if (pm_command_call_value_p(node)) {
+                return left > PM_BINDING_POWER_COMPOSITION;
+            }
+            return false;
+        case PM_DEF_NODE:
+            // An endless method whose body is a command-style call (e.g.,
+            // `def f = foo bar`) is a command assignment and can only be
+            // followed by modifiers.
+            return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
+        case PM_RESCUE_MODIFIER_NODE:
+            // A rescue modifier whose handler is a pattern match (=> or in)
+            // produces a statement and cannot be followed by operators above
+            // the modifier level.
+            if (left > PM_BINDING_POWER_MODIFIER) {
+                pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+                pm_node_t *rescue_expression = cast->rescue_expression;
+                return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
+            }
+            return false;
+        default:
+            return false;
+    }
 }
 
 /**
@@ -22109,46 +21988,40 @@ pm_call_node_command_p(const pm_call_node_t *node) {
  * determine if they need to perform additional cleanup.
  */
 static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
     if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
         pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
-        return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+        return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
     }
 
-    pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+    pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
 
+    // Some prefix nodes are statements and can only be followed by modifiers
+    // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
+    // here before entering the infix loop.
     switch (PM_NODE_TYPE(node)) {
-        case PM_MISSING_NODE:
-            // If we found a syntax error, then the type of node returned by
-            // parse_expression_prefix is going to be a missing node.
+        case PM_ERROR_RECOVERY_NODE:
             return node;
         case PM_PRE_EXECUTION_NODE:
+            return node;
         case PM_POST_EXECUTION_NODE:
         case PM_ALIAS_GLOBAL_VARIABLE_NODE:
         case PM_ALIAS_METHOD_NODE:
-        case PM_MULTI_WRITE_NODE:
         case PM_UNDEF_NODE:
-            // These expressions are statements, and cannot be followed by
-            // operators (except modifiers).
             if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
                 return node;
             }
             break;
         case PM_CALL_NODE:
-            // If we have a call node, then we need to check if it looks like a
-            // method call without parentheses that contains arguments. If it
-            // does, then it has different rules for parsing infix operators,
-            // namely that it only accepts composition (and/or) and modifiers
-            // (if/unless/etc.).
-            if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
+        case PM_SUPER_NODE:
+        case PM_YIELD_NODE:
+        case PM_DEF_NODE:
+            if (parse_expression_terminator(parser, node)) {
                 return node;
             }
             break;
         case PM_SYMBOL_NODE:
-            // If we have a symbol node that is being parsed as a label, then we
-            // need to immediately return, because there should never be an
-            // infix operator following this node.
-            if (pm_symbol_node_label_p(node)) {
+            if (pm_symbol_node_label_p(parser, node)) {
                 return node;
             }
             break;
@@ -22156,8 +22029,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
             break;
     }
 
-    // Otherwise we'll look and see if the next token can be parsed as an infix
-    // operator. If it can, then we'll parse it using parse_expression_infix.
+    // Look and see if the next token can be parsed as an infix operator. If it
+    // can, then we'll parse it using parse_expression_infix.
     pm_binding_powers_t current_binding_powers;
     pm_token_type_t current_token_type;
 
@@ -22167,39 +22040,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
         binding_power <= current_binding_powers.left &&
         current_binding_powers.binary
      ) {
-        node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
-
-        switch (PM_NODE_TYPE(node)) {
-            case PM_MULTI_WRITE_NODE:
-                // Multi-write nodes are statements, and cannot be followed by
-                // operators except modifiers.
-                if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
-                    return node;
-                }
-                break;
-            case PM_CLASS_VARIABLE_WRITE_NODE:
-            case PM_CONSTANT_PATH_WRITE_NODE:
-            case PM_CONSTANT_WRITE_NODE:
-            case PM_GLOBAL_VARIABLE_WRITE_NODE:
-            case PM_INSTANCE_VARIABLE_WRITE_NODE:
-            case PM_LOCAL_VARIABLE_WRITE_NODE:
-                // These expressions are statements, by virtue of the right-hand
-                // side of their write being an implicit array.
-                if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
-                    return node;
-                }
-                break;
-            case PM_CALL_NODE:
-                // These expressions are also statements, by virtue of the
-                // right-hand side of the expression (i.e., the last argument to
-                // the call node) being an implicit array.
-                if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
-                    return node;
-                }
-                break;
-            default:
-                break;
-        }
+        node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
+        if (parse_expression_terminator(parser, node)) return node;
 
         // If the operator is nonassoc and we should not be able to parse the
         // upcoming infix operator, break.
@@ -22207,7 +22049,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
             // If this is a non-assoc operator and we are about to parse the
             // exact same operator, then we need to add an error.
             if (match1(parser, current_token_type)) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
                 break;
             }
 
@@ -22220,7 +22062,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
             //
             if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
                 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
                     break;
                 }
 
@@ -22232,7 +22074,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
             }
         }
 
-        if (accepts_command_call) {
+        if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
             // A command-style method call is only accepted on method chains.
             // Thus, we check whether the parsed node can continue method chains.
             // The method chain can continue if the parsed node is one of the following five kinds:
@@ -22247,29 +22089,29 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
                     if (
                         // (1) foo[1]
                         !(
-                            cast->call_operator_loc.start == NULL &&
-                            cast->message_loc.start != NULL &&
-                            cast->message_loc.start[0] == '[' &&
-                            cast->message_loc.end[-1] == ']'
+                            cast->call_operator_loc.length == 0 &&
+                            cast->message_loc.length > 0 &&
+                            parser->start[cast->message_loc.start] == '[' &&
+                            parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
                         ) &&
                         // (2) foo.bar
                         !(
-                            cast->call_operator_loc.start != NULL &&
+                            cast->call_operator_loc.length > 0 &&
                             cast->arguments == NULL &&
                             cast->block == NULL &&
-                            cast->opening_loc.start == NULL
+                            cast->opening_loc.length == 0
                         ) &&
                         // (3) foo.bar(1)
                         !(
-                            cast->call_operator_loc.start != NULL &&
-                            cast->opening_loc.start != NULL
+                            cast->call_operator_loc.length > 0 &&
+                            cast->opening_loc.length > 0
                         ) &&
                         // (4) foo.bar do end
                         !(
                             cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
                         )
                      ) {
-                        accepts_command_call = false;
+                        flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
                     }
                     break;
                 }
@@ -22277,10 +22119,21 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
                 case PM_CONSTANT_PATH_NODE:
                     break;
                 default:
-                    accepts_command_call = false;
+                    flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
                     break;
             }
         }
+
+        if (context_terminator(parser->current_context->context, &parser->current)) {
+            pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
+            if (
+                !next_binding_powers.binary ||
+                binding_power > next_binding_powers.left ||
+                (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
+            ) {
+                return node;
+            }
+        }
     }
 
     return node;
@@ -22299,15 +22152,16 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
 
         pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
         pm_arguments_node_arguments_append(
+            parser->arena,
             arguments,
-            (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
+            UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
         );
 
-        pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
+        pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
             parser,
             arguments,
             pm_parser_constant_id_constant(parser, "print", 5)
-        ), true);
+        )), true);
     }
 
     if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
@@ -22318,47 +22172,49 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
 
             pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
             pm_arguments_node_arguments_append(
+                parser->arena,
                 arguments,
-                (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
+                UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
             );
 
             pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
-            pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
+            pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
 
             pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
                 parser,
                 pm_parser_constant_id_constant(parser, "$F", 2),
-                (pm_node_t *) call
+                UP(call)
             );
 
-            pm_statements_node_body_prepend(statements, (pm_node_t *) write);
+            pm_statements_node_body_prepend(parser->arena, statements, UP(write));
         }
 
         pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
         pm_arguments_node_arguments_append(
+            parser->arena,
             arguments,
-            (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
+            UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
         );
 
         if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
             pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
-            pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
+            pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
                 parser,
-                (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
-                &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
-                (pm_node_t *) pm_true_node_synthesized_create(parser)
-            ));
+                UP(pm_symbol_node_synthesized_create(parser, "chomp")),
+                NULL,
+                UP(pm_true_node_synthesized_create(parser))
+            )));
 
-            pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
-            pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
+            pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
+            pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
         }
 
         pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
-        pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
+        pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
             parser,
-            (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
+            UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
             statements
-        ), true);
+        )), true);
 
         statements = wrapped_statements;
     }
@@ -22402,7 +22258,6 @@ parse_program(pm_parser_t *parser) {
         statements = wrap_statements(parser, statements);
     } else {
         flush_block_exits(parser, previous_block_exits);
-        pm_node_list_free(&current_block_exits);
     }
 
     // If this is an empty file, then we're still going to parse all of the
@@ -22410,10 +22265,10 @@ parse_program(pm_parser_t *parser) {
     // correct the location information.
     if (statements == NULL) {
         statements = pm_statements_node_create(parser);
-        pm_statements_node_location_set(statements, parser->start, parser->start);
+        statements->base.location = (pm_location_t) { 0 };
     }
 
-    return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
+    return UP(pm_program_node_create(parser, &locals, statements));
 }
 
 /******************************************************************************/
@@ -22422,8 +22277,8 @@ parse_program(pm_parser_t *parser) {
 
 /**
  * A vendored version of strnstr that is used to find a substring within a
- * string with a given length. This function is used to search for the Ruby
- * engine name within a shebang when the -x option is passed to Ruby.
+ * string with a given length. This function is used to search for "ruby"
+ * within a shebang when the -x option is passed to Ruby.
  *
  * The only modification that we made here is that we don't do NULL byte checks
  * because we know the little parameter will not have a NULL byte and we allow
@@ -22433,7 +22288,7 @@ static const char *
 pm_strnstr(const char *big, const char *little, size_t big_length) {
     size_t little_length = strlen(little);
 
-    for (const char *big_end = big + big_length; big < big_end; big++) {
+    for (const char *max = big + big_length - little_length; big <= max; big++) {
         if (*big == *little && memcmp(big, little, little_length) == 0) return big;
     }
 
@@ -22451,7 +22306,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) {
 static void
 pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
     if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
-        pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+        pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
     }
 }
 #endif
@@ -22486,11 +22341,14 @@ pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const c
 /**
  * Initialize a parser with the given start and end pointers.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+void
+pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+    assert(arena != NULL);
     assert(source != NULL);
 
     *parser = (pm_parser_t) {
+        .arena = arena,
+        .metadata_arena = { 0 },
         .node_id = 0,
         .lex_state = PM_LEX_STATE_BEG,
         .enclosure_nesting = 0,
@@ -22509,7 +22367,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
         .next_start = NULL,
         .heredoc_end = NULL,
-        .data_loc = { .start = NULL, .end = NULL },
+        .data_loc = { 0 },
         .comment_list = { 0 },
         .magic_comment_list = { 0 },
         .warning_list = { 0 },
@@ -22519,11 +22377,11 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .encoding = PM_ENCODING_UTF_8_ENTRY,
         .encoding_changed_callback = NULL,
         .encoding_comment_start = source,
-        .lex_callback = NULL,
+        .lex_callback = { 0 },
         .filepath = { 0 },
         .constant_pool = { 0 },
-        .newline_list = { 0 },
-        .integer_base = 0,
+        .line_offsets = { 0 },
+        .integer = { 0 },
         .current_string = PM_STRING_EMPTY,
         .start_line = 1,
         .explicit_encoding = NULL,
@@ -22532,6 +22390,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .partial_script = false,
         .command_start = true,
         .recovering = false,
+        .continuable = true,
         .encoding_locked = false,
         .encoding_changed = false,
         .pattern_matching_newlines = false,
@@ -22539,32 +22398,30 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .current_block_exits = NULL,
         .semantic_token_seen = false,
         .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
-        .current_regular_expression_ascii_only = false,
         .warn_mismatched_indentation = true
     };
 
-    // Initialize the constant pool. We're going to completely guess as to the
-    // number of constants that we'll need based on the size of the input. The
-    // ratio we chose here is actually less arbitrary than you might think.
-    //
-    // We took ~50K Ruby files and measured the size of the file versus the
-    // number of constants that were found in those files. Then we found the
-    // average and standard deviation of the ratios of constants/bytesize. Then
-    // we added 1.34 standard deviations to the average to get a ratio that
-    // would fit 75% of the files (for a two-tailed distribution). This works
-    // because there was about a 0.77 correlation and the distribution was
-    // roughly normal.
-    //
-    // This ratio will need to change if we add more constants to the constant
-    // pool for another node type.
-    uint32_t constant_size = ((uint32_t) size) / 95;
-    pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
-
-    // Initialize the newline list. Similar to the constant pool, we're going to
-    // guess at the number of newlines that we'll need based on the size of the
-    // input.
+    /* Pre-size the arenas based on input size to reduce the number of block
+     * allocations (and the kernel page zeroing they trigger). The ratios were
+     * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
+     * The reserve call is a no-op when the capacity is at or below the default
+     * arena block size, so small inputs don't waste an extra allocation. */
+    if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
+    if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
+
+    /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
+     * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
+     * We use 120 as a balance between over-allocation waste and resize
+     * frequency. Resizes are cheap with arena allocation, so we lean toward
+     * under-estimating. */
+    uint32_t constant_size = ((uint32_t) size) / 120;
+    pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
+
+    /* Initialize the line offset list. Similar to the constant pool, we are
+     * going to estimate the number of newlines that we will need based on the
+     * size of the input. */
     size_t newline_size = size / 22;
-    pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
+    pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
 
     // If options were provided to this parse, establish them here.
     if (options != NULL) {
@@ -22601,7 +22458,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
 
         for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
-            const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
+            const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
             pm_parser_scope_push(parser, scope_index == 0);
 
             // Scopes given from the outside are not allowed to have numbered
@@ -22609,20 +22466,24 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
             parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
 
             for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
-                const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
+                const pm_string_t *local = pm_options_scope_local(scope, local_index);
 
                 const uint8_t *source = pm_string_source(local);
                 size_t length = pm_string_length(local);
 
-                void *allocated = xmalloc(length);
-                if (allocated == NULL) continue;
-
+                uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
                 memcpy(allocated, source, length);
-                pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
+                pm_parser_local_add_owned(parser, allocated, length);
             }
         }
     }
 
+    // Now that we have established the user-provided options, check if
+    // a version was given and parse as the latest version otherwise.
+    if (parser->version == PM_OPTIONS_VERSION_UNSET) {
+        parser->version = PM_OPTIONS_VERSION_LATEST;
+    }
+
     pm_accepts_block_stack_push(parser, true);
 
     // Skip past the UTF-8 BOM if it exists.
@@ -22656,8 +22517,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
     // If the shebang does not include "ruby" and this is the main script being
     // parsed, then we will start searching the file for a shebang that does
     // contain "ruby" as if -x were passed on the command line.
-    const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
-    size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
+    const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+    size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
 
     if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
         const char *engine;
@@ -22676,7 +22537,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
             }
 
             search_shebang = false;
-        } else if (options->main_script && !parser->parsing_eval) {
+        } else if (options != NULL && options->main_script && !parser->parsing_eval) {
             search_shebang = true;
         }
     }
@@ -22697,7 +22558,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         const uint8_t *newline = next_newline(cursor, parser->end - cursor);
 
         while (newline != NULL) {
-            pm_newline_list_append(&parser->newline_list, newline);
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
 
             cursor = newline + 1;
             newline = next_newline(cursor, parser->end - cursor);
@@ -22726,8 +22587,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
             parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
             parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
         } else {
-            pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
-            pm_newline_list_clear(&parser->newline_list);
+            pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
+            pm_line_offset_list_clear(&parser->line_offsets);
         }
     }
 
@@ -22738,56 +22599,28 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
 }
 
 /**
- * Register a callback that will be called whenever prism changes the encoding
- * it is using to parse based on the magic comment.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
-    parser->encoding_changed_callback = callback;
-}
-
-/**
- * Free all of the memory associated with the comment list.
- */
-static inline void
-pm_comment_list_free(pm_list_t *list) {
-    pm_list_node_t *node, *next;
-
-    for (node = list->head; node != NULL; node = next) {
-        next = node->next;
-
-        pm_comment_t *comment = (pm_comment_t *) node;
-        xfree(comment);
-    }
-}
-
-/**
- * Free all of the memory associated with the magic comment list.
+ * Allocate and initialize a parser with the given start and end pointers.
+ *
+ * The resulting parser must eventually be freed with `pm_parser_free()`. The
+ * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()`
+ * does not free the arena.
  */
-static inline void
-pm_magic_comment_list_free(pm_list_t *list) {
-    pm_list_node_t *node, *next;
-
-    for (node = list->head; node != NULL; node = next) {
-        next = node->next;
+pm_parser_t *
+pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
+    pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
+    if (parser == NULL) abort();
 
-        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
-        xfree(magic_comment);
-    }
+    pm_parser_init(arena, parser, source, size, options);
+    return parser;
 }
 
 /**
  * Free any memory associated with the given parser.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_free(pm_parser_t *parser) {
-    pm_string_free(&parser->filepath);
-    pm_diagnostic_list_free(&parser->error_list);
-    pm_diagnostic_list_free(&parser->warning_list);
-    pm_comment_list_free(&parser->comment_list);
-    pm_magic_comment_list_free(&parser->magic_comment_list);
-    pm_constant_pool_free(&parser->constant_pool);
-    pm_newline_list_free(&parser->newline_list);
+void
+pm_parser_cleanup(pm_parser_t *parser) {
+    pm_string_cleanup(&parser->filepath);
+    pm_arena_cleanup(&parser->metadata_arena);
 
     while (parser->current_scope != NULL) {
         // Normally, popping the scope doesn't free the locals since it is
@@ -22803,145 +22636,224 @@ pm_parser_free(pm_parser_t *parser) {
 }
 
 /**
- * Parse the Ruby source associated with the given parser and return the tree.
+ * Free both the memory held by the given parser and the parser itself.
  */
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse(pm_parser_t *parser) {
-    return parse_program(parser);
+void
+pm_parser_free(pm_parser_t *parser) {
+    pm_parser_cleanup(parser);
+    xfree_sized(parser, sizeof(pm_parser_t));
 }
 
 /**
- * Read into the stream until the gets callback returns false. If the last read
- * line from the stream matches an __END__ marker, then halt and return false,
- * otherwise return true.
+ * Returns true if the given diagnostic ID represents an error that cannot be
+ * fixed by appending more input. These are errors where the existing source
+ * contains definitively invalid syntax (as opposed to merely incomplete input).
  */
 static bool
-pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
-#define LINE_SIZE 4096
-    char line[LINE_SIZE];
-
-    while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
-        size_t length = LINE_SIZE;
-        while (length > 0 && line[length - 1] == '\n') length--;
-
-        if (length == LINE_SIZE) {
-            // If we read a line that is the maximum size and it doesn't end
-            // with a newline, then we'll just append it to the buffer and
-            // continue reading.
-            length--;
-            pm_buffer_append_string(buffer, line, length);
-            continue;
-        }
-
-        // Append the line to the buffer.
-        length--;
-        pm_buffer_append_string(buffer, line, length);
-
-        // Check if the line matches the __END__ marker. If it does, then stop
-        // reading and return false. In most circumstances, this means we should
-        // stop reading from the stream so that the DATA constant can pick it
-        // up.
-        switch (length) {
-            case 7:
-                if (strncmp(line, "__END__", 7) == 0) return false;
-                break;
-            case 8:
-                if (strncmp(line, "__END__\n", 8) == 0) return false;
-                break;
-            case 9:
-                if (strncmp(line, "__END__\r\n", 9) == 0) return false;
-                break;
-        }
+pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
+    switch (diag_id) {
+        case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
+        case PM_ERR_BEGIN_UPCASE_BRACE:
+        case PM_ERR_CLASS_VARIABLE_BARE:
+        case PM_ERR_END_UPCASE_BRACE:
+        case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
+        case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
+        case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
+        case PM_ERR_EXPRESSION_NOT_WRITABLE:
+        case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
+        case PM_ERR_FLOAT_PARSE:
+        case PM_ERR_GLOBAL_VARIABLE_BARE:
+        case PM_ERR_HASH_KEY:
+        case PM_ERR_HEREDOC_IDENTIFIER:
+        case PM_ERR_INSTANCE_VARIABLE_BARE:
+        case PM_ERR_INVALID_BLOCK_EXIT:
+        case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
+        case PM_ERR_INVALID_FLOAT_EXPONENT:
+        case PM_ERR_INVALID_NUMBER_BINARY:
+        case PM_ERR_INVALID_NUMBER_DECIMAL:
+        case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
+        case PM_ERR_INVALID_NUMBER_OCTAL:
+        case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
+        case PM_ERR_NO_LOCAL_VARIABLE:
+        case PM_ERR_PARAMETER_ORDER:
+        case PM_ERR_STATEMENT_UNDEF:
+        case PM_ERR_VOID_EXPRESSION:
+            return true;
+        default:
+            return false;
     }
-
-    return true;
-#undef LINE_SIZE
 }
 
 /**
- * Determine if there was an unterminated heredoc at the end of the input, which
- * would mean the stream isn't finished and we should keep reading.
+ * Determine whether the source parsed by the given parser could become valid if
+ * more input were appended. This is used by tools like IRB to decide whether to
+ * prompt for continuation or to display an error.
+ *
+ * The parser starts with continuable=true. This function scans all errors to
+ * detect two categories of non-continuable errors:
+ *
+ * 1. Fatal errors: errors like invalid number literals or bare global variables
+ *    that indicate definitively invalid syntax. These are only considered fatal
+ *    if they occur before EOF (at EOF they could be from truncated input, e.g.
+ *    `"\x` is an incomplete hex escape).
  *
- * For the other lex modes we can check if the lex mode has been closed, but for
- * heredocs when we hit EOF we close the lex mode and then go back to parse the
- * rest of the line after the heredoc declaration so that we get more of the
- * syntax tree.
+ * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context
+ *    errors indicate tokens that don't belong. A stray token is a cascade
+ *    effect (and does not prevent continuability) if:
+ *
+ *    a. A non-stray, non-fatal error appeared earlier in the error list at a
+ *       strictly earlier source position (the stray was caused by a preceding
+ *       parse failure, e.g. a truncated heredoc), OR
+ *    b. The stray token is at EOF, starts after position 0 (there is valid
+ *       code before it), and either is a single byte (likely a truncated
+ *       token like `\`) or there are non-stray errors elsewhere.
+ *
+ *    Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray —
+ *    they are complete tokens and cannot become part of a longer valid
+ *    construct by appending more input.
+ *
+ *    c. The stray token is `=` at the start of a line, which could be the
+ *       beginning of `=begin` (an embedded document). The remaining bytes
+ *       after `=` may parse as an identifier, so the error is not at EOF,
+ *       but the construct is genuinely incomplete.
  */
-static bool
-pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
-    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
+static void
+pm_parse_continuable(pm_parser_t *parser) {
+    // If there are no errors then there is nothing to continue.
+    if (parser->error_list.size == 0) {
+        parser->continuable = false;
+        return;
+    }
 
-    for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
-        if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
-            return true;
+    if (!parser->continuable) return;
+
+    size_t source_length = (size_t) (parser->end - parser->start);
+
+    // First pass: check if there are any non-stray, non-fatal errors.
+    bool has_non_stray_error = false;
+    for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+        if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
+            has_non_stray_error = true;
+            break;
         }
     }
 
-    return false;
-}
+    // Second pass: check each error. We track the minimum source position
+    // among non-stray, non-fatal errors seen so far in list order, which
+    // lets us detect cascade stray tokens.
+    size_t non_stray_min_start = SIZE_MAX;
 
-/**
- * Parse a stream of Ruby source and return the tree.
- *
- * Prism is designed around having the entire source in memory at once, but you
- * can stream stdin in to Ruby so we need to support a streaming API.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
-    pm_buffer_init(buffer);
+    for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+        size_t error_start = (size_t) error->location.start;
+        size_t error_end = error_start + (size_t) error->location.length;
+        bool at_eof = error_end >= source_length;
 
-    bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
-    pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
-    pm_node_t *node = pm_parse(parser);
+        // Fatal errors are non-continuable unless they occur at EOF.
+        if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
+            parser->continuable = false;
+            return;
+        }
 
-    while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
-        pm_node_destroy(parser, node);
-        eof = pm_parse_stream_read(buffer, stream, stream_fgets);
+        // Track non-stray, non-fatal error positions in list order.
+        if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
+            error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
+            if (error_start < non_stray_min_start) non_stray_min_start = error_start;
+            continue;
+        }
 
-        pm_parser_free(parser);
-        pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
-        node = pm_parse(parser);
+        // This is a stray token. Determine if it is a cascade effect
+        // of a preceding error or genuinely stray.
+
+        // Rule (a): a non-stray error was seen earlier in the list at a
+        // strictly earlier position — this stray is a cascade effect.
+        if (non_stray_min_start < error_start) continue;
+
+        // Rule (b): this stray is at EOF with valid code before it.
+        // Single-byte stray tokens at EOF (like `\` for line continuation)
+        // are likely truncated tokens. Multi-byte stray tokens (like the
+        // keyword `end`) need additional evidence that they are cascade
+        // effects (i.e. non-stray errors exist elsewhere).
+        if (at_eof && error_start > 0) {
+            // Exception: closing delimiters at EOF are genuinely stray.
+            if (error->location.length == 1) {
+                const uint8_t *byte = parser->start + error_start;
+                if (*byte == ')' || *byte == ']' || *byte == '}') {
+                    parser->continuable = false;
+                    return;
+                }
+
+                // Single-byte non-delimiter stray at EOF: cascade.
+                continue;
+            }
+
+            // Multi-byte stray at EOF: cascade only if there are
+            // non-stray errors (evidence of a preceding parse failure).
+            if (has_non_stray_error) continue;
+        }
+
+        // Rule (c): a stray `=` at the start of a line could be the
+        // beginning of an embedded document (`=begin`). The remaining
+        // bytes after `=` parse as an identifier, so the error is not
+        // at EOF, but the construct is genuinely incomplete.
+        if (error->location.length == 1) {
+            const uint8_t *byte = parser->start + error_start;
+            if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
+        }
+
+        // This stray token is genuinely non-continuable.
+        parser->continuable = false;
+        return;
     }
+}
 
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
+pm_node_t *
+pm_parse(pm_parser_t *parser) {
+    pm_node_t *node = parse_program(parser);
+    pm_parse_continuable(parser);
     return node;
 }
 
 /**
- * Parse the source and return true if it parses without errors or warnings.
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * Prism is designed around having the entire source in memory at once, but you
+ * can stream stdin in to Ruby so we need to support a streaming API.
  */
-PRISM_EXPORTED_FUNCTION bool
-pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
-    pm_options_t options = { 0 };
-    pm_options_read(&options, data);
+pm_node_t *
+pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
+    bool eof = pm_source_stream_read(source);
 
-    pm_parser_t parser;
-    pm_parser_init(&parser, source, size, &options);
+    pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+    pm_node_t *node = pm_parse(tmp);
 
-    pm_node_t *node = pm_parse(&parser);
-    pm_node_destroy(&parser, node);
+    while (!eof && tmp->error_list.size > 0) {
+        eof = pm_source_stream_read(source);
 
-    bool result = parser.error_list.size == 0;
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+        pm_parser_free(tmp);
+        pm_arena_cleanup(arena);
 
-    return result;
+        tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+        node = pm_parse(tmp);
+    }
+
+    *parser = tmp;
+    return node;
 }
 
 #undef PM_CASE_KEYWORD
 #undef PM_CASE_OPERATOR
 #undef PM_CASE_WRITABLE
 #undef PM_STRING_EMPTY
-#undef PM_LOCATION_NODE_BASE_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
 
 // We optionally support serializing to a binary string. For systems that don't
 // want or need this functionality, it can be turned off with the
 // PRISM_EXCLUDE_SERIALIZATION define.
 #ifndef PRISM_EXCLUDE_SERIALIZATION
 
-static inline void
+static PRISM_INLINE void
 pm_serialize_header(pm_buffer_t *buffer) {
     pm_buffer_append_string(buffer, "PRISM", 5);
     pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
@@ -22953,7 +22865,7 @@ pm_serialize_header(pm_buffer_t *buffer) {
 /**
  * Serialize the AST represented by the given node to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_serialize_header(buffer);
     pm_serialize_content(parser, node, buffer);
@@ -22964,13 +22876,14 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
  * Parse and serialize the AST represented by the given source to the given
  * buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
     pm_options_t options = { 0 };
     pm_options_read(&options, data);
 
+    pm_arena_t arena = { 0 };
     pm_parser_t parser;
-    pm_parser_init(&parser, source, size, &options);
+    pm_parser_init(&arena, &parser, source, size, &options);
 
     pm_node_t *node = pm_parse(&parser);
 
@@ -22978,216 +22891,53 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
     pm_serialize_content(&parser, node, buffer);
     pm_buffer_append_byte(buffer, '\0');
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+    pm_parser_cleanup(&parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
 }
 
 /**
  * Parse and serialize the AST represented by the source that is read out of the
  * given stream into to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
-    pm_parser_t parser;
+void
+pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
+    pm_arena_t arena = { 0 };
+    pm_parser_t *parser;
     pm_options_t options = { 0 };
     pm_options_read(&options, data);
 
-    pm_buffer_t parser_buffer;
-    pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
+    pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
     pm_serialize_header(buffer);
-    pm_serialize_content(&parser, node, buffer);
+    pm_serialize_content(parser, node, buffer);
     pm_buffer_append_byte(buffer, '\0');
 
-    pm_node_destroy(&parser, node);
-    pm_buffer_free(&parser_buffer);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+    pm_parser_free(parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
 }
 
 /**
  * Parse and serialize the comments in the given source to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
     pm_options_t options = { 0 };
     pm_options_read(&options, data);
 
+    pm_arena_t arena = { 0 };
     pm_parser_t parser;
-    pm_parser_init(&parser, source, size, &options);
+    pm_parser_init(&arena, &parser, source, size, &options);
 
-    pm_node_t *node = pm_parse(&parser);
+    pm_parse(&parser);
     pm_serialize_header(buffer);
     pm_serialize_encoding(parser.encoding, buffer);
     pm_buffer_append_varsint(buffer, parser.start_line);
-    pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+    pm_serialize_comment_list(&parser.comment_list, buffer);
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+    pm_parser_cleanup(&parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
 }
 
 #endif
-
-/******************************************************************************/
-/* Slice queries for the Ruby API                                             */
-/******************************************************************************/
-
-/** The category of slice returned from pm_slice_type. */
-typedef enum {
-    /** Returned when the given encoding name is invalid. */
-    PM_SLICE_TYPE_ERROR = -1,
-
-    /** Returned when no other types apply to the slice. */
-    PM_SLICE_TYPE_NONE,
-
-    /** Returned when the slice is a valid local variable name. */
-    PM_SLICE_TYPE_LOCAL,
-
-    /** Returned when the slice is a valid constant name. */
-    PM_SLICE_TYPE_CONSTANT,
-
-    /** Returned when the slice is a valid method name. */
-    PM_SLICE_TYPE_METHOD_NAME
-} pm_slice_type_t;
-
-/**
- * Check that the slice is a valid local variable name or constant.
- */
-pm_slice_type_t
-pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
-    // first, get the right encoding object
-    const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
-    if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
-
-    // check that there is at least one character
-    if (length == 0) return PM_SLICE_TYPE_NONE;
-
-    size_t width;
-    if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
-        // valid because alphabetical
-    } else if (*source == '_') {
-        // valid because underscore
-        width = 1;
-    } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
-        // valid because multibyte
-    } else {
-        // invalid because no match
-        return PM_SLICE_TYPE_NONE;
-    }
-
-    // determine the type of the slice based on the first character
-    const uint8_t *end = source + length;
-    pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
-
-    // next, iterate through all of the bytes of the string to ensure that they
-    // are all valid identifier characters
-    source += width;
-
-    while (source < end) {
-        if ((width = encoding->alnum_char(source, end - source)) != 0) {
-            // valid because alphanumeric
-            source += width;
-        } else if (*source == '_') {
-            // valid because underscore
-            source++;
-        } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
-            // valid because multibyte
-            source += width;
-        } else {
-            // invalid because no match
-            break;
-        }
-    }
-
-    // accept a ! or ? at the end of the slice as a method name
-    if (*source == '!' || *source == '?' || *source == '=') {
-        source++;
-        result = PM_SLICE_TYPE_METHOD_NAME;
-    }
-
-    // valid if we are at the end of the slice
-    return source == end ? result : PM_SLICE_TYPE_NONE;
-}
-
-/**
- * Check that the slice is a valid local variable name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
-    switch (pm_slice_type(source, length, encoding_name)) {
-        case PM_SLICE_TYPE_ERROR:
-            return PM_STRING_QUERY_ERROR;
-        case PM_SLICE_TYPE_NONE:
-        case PM_SLICE_TYPE_CONSTANT:
-        case PM_SLICE_TYPE_METHOD_NAME:
-            return PM_STRING_QUERY_FALSE;
-        case PM_SLICE_TYPE_LOCAL:
-            return PM_STRING_QUERY_TRUE;
-    }
-
-    assert(false && "unreachable");
-    return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid constant name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
-    switch (pm_slice_type(source, length, encoding_name)) {
-        case PM_SLICE_TYPE_ERROR:
-            return PM_STRING_QUERY_ERROR;
-        case PM_SLICE_TYPE_NONE:
-        case PM_SLICE_TYPE_LOCAL:
-        case PM_SLICE_TYPE_METHOD_NAME:
-            return PM_STRING_QUERY_FALSE;
-        case PM_SLICE_TYPE_CONSTANT:
-            return PM_STRING_QUERY_TRUE;
-    }
-
-    assert(false && "unreachable");
-    return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid method name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
-#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
-#define C1(c) (*source == c)
-#define C2(s) (memcmp(source, s, 2) == 0)
-#define C3(s) (memcmp(source, s, 3) == 0)
-
-    switch (pm_slice_type(source, length, encoding_name)) {
-        case PM_SLICE_TYPE_ERROR:
-            return PM_STRING_QUERY_ERROR;
-        case PM_SLICE_TYPE_NONE:
-            break;
-        case PM_SLICE_TYPE_LOCAL:
-            // numbered parameters are not valid method names
-            return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
-        case PM_SLICE_TYPE_CONSTANT:
-            // all constants are valid method names
-        case PM_SLICE_TYPE_METHOD_NAME:
-            // all method names are valid method names
-            return PM_STRING_QUERY_TRUE;
-    }
-
-    switch (length) {
-        case 1:
-            return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
-        case 2:
-            return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
-        case 3:
-            return B(C3("===") || C3("<=>") || C3("[]="));
-        default:
-            return PM_STRING_QUERY_FALSE;
-    }
-
-#undef B
-#undef C1
-#undef C2
-#undef C3
-}
diff --git a/prism/prism.h b/prism/prism.h
index 317568aa0c..b342bb32c6 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -6,281 +6,25 @@
 #ifndef PRISM_H
 #define PRISM_H
 
-#include "prism/defines.h"
-#include "prism/util/pm_buffer.h"
-#include "prism/util/pm_char.h"
-#include "prism/util/pm_integer.h"
-#include "prism/util/pm_memchr.h"
-#include "prism/util/pm_strncasecmp.h"
-#include "prism/util/pm_strpbrk.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "prism/arena.h"
 #include "prism/ast.h"
+#include "prism/buffer.h"
 #include "prism/diagnostic.h"
+#include "prism/json.h"
 #include "prism/node.h"
 #include "prism/options.h"
-#include "prism/pack.h"
 #include "prism/parser.h"
 #include "prism/prettyprint.h"
-#include "prism/regexp.h"
-#include "prism/static_literals.h"
+#include "prism/serialize.h"
+#include "prism/source.h"
+#include "prism/stream.h"
+#include "prism/string_query.h"
 #include "prism/version.h"
 
-#include <assert.h>
-#include <errno.h>
-#include <locale.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef _WIN32
-#include <strings.h>
-#endif
-
-/**
- * The prism version and the serialization format.
- *
- * @returns The prism version as a constant string.
- */
-PRISM_EXPORTED_FUNCTION const char * pm_version(void);
-
-/**
- * Initialize a parser with the given start and end pointers.
- *
- * @param parser The parser to initialize.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param options The optional options to use when parsing.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
-
-/**
- * Register a callback that will be called whenever prism changes the encoding
- * it is using to parse based on the magic comment.
- *
- * @param parser The parser to register the callback with.
- * @param callback The callback to register.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
-
-/**
- * Free any memory associated with the given parser.
- *
- * @param parser The parser to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
-
-/**
- * Initiate the parser with the given parser.
- *
- * @param parser The parser to use.
- * @return The AST representing the source.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
-
-/**
- * This function is used in pm_parse_stream to retrieve a line of input from a
- * stream. It closely mirrors that of fgets so that fgets can be used as the
- * default implementation.
- */
-typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
-
-/**
- * Parse a stream of Ruby source and return the tree.
- *
- * @param parser The parser to use.
- * @param buffer The buffer to use.
- * @param stream The stream to parse.
- * @param stream_fgets The function to use to read from the stream.
- * @param options The optional options to use when parsing.
- * @return The AST representing the source.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options);
-
-// We optionally support serializing to a binary string. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_SERIALIZATION define.
-#ifndef PRISM_EXCLUDE_SERIALIZATION
-
-/**
- * Parse and serialize the AST represented by the source that is read out of the
- * given stream into to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param stream The stream to parse.
- * @param stream_fgets The function to use to read from the stream.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data);
-
-/**
- * Serialize the given list of comments to the given buffer.
- *
- * @param parser The parser to serialize.
- * @param list The list of comments to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
-
-/**
- * Serialize the name of the encoding to the buffer.
- *
- * @param encoding The encoding to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer);
-
-/**
- * Serialize the encoding, metadata, nodes, and constant pool.
- *
- * @param parser The parser to serialize.
- * @param node The node to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-/**
- * Serialize the AST represented by the given node to the given buffer.
- *
- * @param parser The parser to serialize.
- * @param node The node to serialize.
- * @param buffer The buffer to serialize to.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-/**
- * Parse the given source to the AST and dump the AST to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Parse and serialize the comments in the given source to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Lex the given source and serialize to the given buffer.
- *
- * @param source The source to lex.
- * @param size The size of the source.
- * @param buffer The buffer to serialize to.
- * @param data The optional data to pass to the lexer.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Parse and serialize both the AST and the tokens represented by the given
- * source to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-#endif
-
-/**
- * Parse the source and return true if it parses without errors or warnings.
- *
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- * @return True if the source parses without errors or warnings.
- */
-PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data);
-
-/**
- * Returns a string representation of the given token type.
- *
- * @param token_type The token type to convert to a string.
- * @return A string representation of the given token type.
- */
-PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
-
-/**
- * Returns the human name of the given token type.
- *
- * @param token_type The token type to convert to a human name.
- * @return The human name of the given token type.
- */
-const char * pm_token_type_human(pm_token_type_t token_type);
-
-// We optionally support dumping to JSON. For systems that don't want or need
-// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
-#ifndef PRISM_EXCLUDE_JSON
-
-/**
- * Dump JSON to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param parser The parser that parsed the node.
- * @param node The node to serialize.
- */
-PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
-
-#endif
-
-/**
- * Represents the results of a slice query.
- */
-typedef enum {
-    /** Returned if the encoding given to a slice query was invalid. */
-    PM_STRING_QUERY_ERROR = -1,
-
-    /** Returned if the result of the slice query is false. */
-    PM_STRING_QUERY_FALSE,
-
-    /** Returned if the result of the slice query is true. */
-    PM_STRING_QUERY_TRUE
-} pm_string_query_t;
-
-/**
- * Check that the slice is a valid local variable name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name);
-
-/**
- * Check that the slice is a valid constant name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name);
-
-/**
- * Check that the slice is a valid method name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name);
-
 /**
  * @mainpage
  *
@@ -289,7 +33,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
  * dependencies. It is currently being integrated into
  * [CRuby](https://github.com/ruby/ruby),
  * [JRuby](https://github.com/jruby/jruby),
- * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [TruffleRuby](https://github.com/truffleruby/truffleruby),
  * [Sorbet](https://github.com/sorbet/sorbet), and
  * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
  *
@@ -303,32 +47,32 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
  *
  * @section parsing Parsing
  *
- * In order to parse Ruby code, the structures and functions that you're going
- * to want to use and be aware of are:
+ * In order to parse Ruby code, the functions that you are going to want to use
+ * and be aware of are:
  *
- * * `pm_parser_t` - the main parser structure
- * * `pm_parser_init` - initialize a parser
- * * `pm_parse` - parse and return the root node
- * * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
- * * `pm_parser_free` - free the internal memory of the parser
+ * * `pm_arena_new()` - create a new arena to hold all AST-lifetime allocations
+ * * `pm_parser_new()` - allocate and initialize a new parser
+ * * `pm_parse()` - parse and return the root node
+ * * `pm_parser_free()` - free the parser and its internal memory
+ * * `pm_arena_free()` - free all AST-lifetime memory
  *
  * Putting all of this together would look something like:
  *
  * ```c
  * void parse(const uint8_t *source, size_t length) {
- *     pm_parser_t parser;
- *     pm_parser_init(&parser, source, length, NULL);
+ *     pm_arena_t *arena = pm_arena_new();
+ *     pm_parser_t *parser = pm_parser_new(arena, source, length, NULL);
  *
- *     pm_node_t *root = pm_parse(&parser);
+ *     pm_node_t *root = pm_parse(parser);
  *     printf("PARSED!\n");
  *
- *     pm_node_destroy(&parser, root);
- *     pm_parser_free(&parser);
+ *     pm_parser_free(parser);
+ *     pm_arena_free(arena);
  * }
  * ```
  *
- * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
- * their first member. This means you can downcast and upcast any node in the
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures
+ * as their first member. This means you can downcast and upcast any node in the
  * tree to a `pm_node_t`.
  *
  * @section serializing Serializing
@@ -336,48 +80,51 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
  * Prism provides the ability to serialize the AST and its related metadata into
  * a binary format. This format is designed to be portable to different
  * languages and runtimes so that you only need to make one FFI call in order to
- * parse Ruby code. The structures and functions that you're going to want to
- * use and be aware of are:
+ * parse Ruby code. The functions that you are going to want to use and be
+ * aware of are:
  *
- * * `pm_buffer_t` - a small buffer object that will hold the serialized AST
- * * `pm_buffer_free` - free the memory associated with the buffer
- * * `pm_serialize` - serialize the AST into a buffer
- * * `pm_serialize_parse` - parse and serialize the AST into a buffer
+ * * `pm_buffer_new()` - create a new buffer
+ * * `pm_buffer_free()` - free the buffer and its internal memory
+ * * `pm_serialize_parse()` - parse and serialize the AST into a buffer
  *
  * Putting all of this together would look something like:
  *
  * ```c
  * void serialize(const uint8_t *source, size_t length) {
- *     pm_buffer_t buffer = { 0 };
+ *     pm_buffer_t *buffer = pm_buffer_new();
  *
- *     pm_serialize_parse(&buffer, source, length, NULL);
+ *     pm_serialize_parse(buffer, source, length, NULL);
  *     printf("SERIALIZED!\n");
  *
- *     pm_buffer_free(&buffer);
+ *     pm_buffer_free(buffer);
  * }
  * ```
  *
  * @section inspecting Inspecting
  *
  * Prism provides the ability to inspect the AST by pretty-printing nodes. You
- * can do this with the `pm_prettyprint` function, which you would use like:
+ * can do this with the `pm_prettyprint()` function, which you would use like:
  *
  * ```c
  * void prettyprint(const uint8_t *source, size_t length) {
- *     pm_parser_t parser;
- *     pm_parser_init(&parser, source, length, NULL);
+ *     pm_arena_t *arena = pm_arena_new();
+ *     pm_parser_t *parser = pm_parser_new(arena, source, length, NULL);
  *
- *     pm_node_t *root = pm_parse(&parser);
- *     pm_buffer_t buffer = { 0 };
+ *     pm_node_t *root = pm_parse(parser);
+ *     pm_buffer_t *buffer = pm_buffer_new();
  *
- *     pm_prettyprint(&buffer, &parser, root);
- *     printf("%*.s\n", (int) buffer.length, buffer.value);
+ *     pm_prettyprint(buffer, parser, root);
+ *     printf("%*.s\n", (int) pm_buffer_length(buffer), pm_buffer_value(buffer));
  *
- *     pm_buffer_free(&buffer);
- *     pm_node_destroy(&parser, root);
- *     pm_parser_free(&parser);
+ *     pm_buffer_free(buffer);
+ *     pm_parser_free(parser);
+ *     pm_arena_free(arena);
  * }
  * ```
  */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/prism/regexp.c b/prism/regexp.c
index dcc7476244..cc17aa4d09 100644
--- a/prism/regexp.c
+++ b/prism/regexp.c
@@ -1,5 +1,20 @@
-#include "prism/regexp.h"
-
+#include "prism/internal/regexp.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/compiler/fallthrough.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/char.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/memchr.h"
+#include "prism/internal/parser.h"
+#include "prism/internal/stringy.h"
+#include "prism/internal/strncasecmp.h"
+
+#include <assert.h>
+#include <string.h>
+
+/** The maximum depth of nested groups allowed in a regular expression. */
 #define PM_REGEXP_PARSE_DEPTH_MAX 4096
 
 /**
@@ -18,6 +33,54 @@ typedef struct {
     /** A pointer to the end of the source that we are parsing. */
     const uint8_t *end;
 
+    /** The encoding of the source. */
+    const pm_encoding_t *encoding;
+
+    /** The callback to call when a named capture group is found. */
+    pm_regexp_name_callback_t name_callback;
+
+    /** The data to pass to the name callback. */
+    pm_regexp_name_data_t *name_data;
+
+    /** The start of the regexp node (for error locations). */
+    const uint8_t *node_start;
+
+    /** The end of the regexp node (for error locations). */
+    const uint8_t *node_end;
+
+    /**
+     * The explicit encoding determined by escape sequences. NULL if no
+     * encoding-setting escape has been seen, UTF-8 for `\u` escapes, or the
+     * source encoding for `\x` escapes.
+     */
+    const pm_encoding_t *explicit_encoding;
+
+    /**
+     * Pointer to the first non-POSIX property name (for /n error messages).
+     * POSIX properties (Alnum, Alpha, etc.) work in all encodings.
+     * Script properties (Hiragana, Katakana, etc.) work in /e, /s, /u.
+     * Unicode-only properties (L, Ll, etc.) work only in /u.
+     */
+    const uint8_t *property_name;
+
+    /** Length of the first non-POSIX property name found. */
+    size_t property_name_length;
+
+    /**
+     * Pointer to the first Unicode-only property name (for /e, /s error
+     * messages). NULL if only POSIX or script properties have been seen.
+     */
+    const uint8_t *unicode_property_name;
+
+    /** Length of the first Unicode-only property name found. */
+    size_t unicode_property_name_length;
+
+    /** Buffer of hex escape byte values >= 0x80, separated by 0x00 sentinels. */
+    pm_buffer_t hex_escape_buffer;
+
+    /** Count of non-ASCII literal bytes (not from escapes). */
+    uint32_t non_ascii_literal_count;
+
     /**
      * Whether or not the regular expression currently being parsed is in
      * extended mode, wherein whitespace is ignored and comments are allowed.
@@ -27,31 +90,77 @@ typedef struct {
     /** Whether the encoding has changed from the default. */
     bool encoding_changed;
 
-    /** The encoding of the source. */
-    const pm_encoding_t *encoding;
+    /** Whether the source content is shared (for named capture callback). */
+    bool shared;
 
-    /** The callback to call when a named capture group is found. */
-    pm_regexp_name_callback_t name_callback;
+    /** Whether a `\u{...}` escape with value >= 0x80 was seen. */
+    bool has_unicode_escape;
 
-    /** The data to pass to the name callback. */
-    void *name_data;
+    /** Whether a `\xNN` escape (or `\M-x`, etc.) with value >= 0x80 was seen. */
+    bool has_hex_escape;
+
+    /**
+     * Tracks whether the last encoding-setting escape was `\u` (true) or `\x`
+     * (false). This matters for error messages when both types are mixed.
+     */
+    bool last_escape_was_unicode;
+
+    /** Whether any `\p{...}` or `\P{...}` property escape was found. */
+    bool has_property_escape;
+
+    /** Whether a Unicode-only property escape was found (not POSIX or script). */
+    bool has_unicode_property_escape;
 
-    /** The callback to call when a parse error is found. */
-    pm_regexp_error_callback_t error_callback;
+    /** Whether a `\u` escape with invalid range (surrogate or > 0x10FFFF) was seen. */
+    bool invalid_unicode_range;
 
-    /** The data to pass to the error callback. */
-    void *error_data;
+    /** Whether we are accumulating consecutive hex escape bytes. */
+    bool hex_group_active;
+
+    /** Whether an invalid multibyte character was found during parsing. */
+    bool has_invalid_multibyte;
 } pm_regexp_parser_t;
 
 /**
- * Append an error to the parser.
+ * Append a syntax error to the parser's error list. If the source is shared
+ * (points into the original source), we can point to the exact error location.
+ * Otherwise, we point to the whole regexp node.
  */
-static inline void
+static PRISM_INLINE void
 pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
-    parser->error_callback(start, end, message, parser->error_data);
+    pm_parser_t *pm = parser->parser;
+    uint32_t loc_start, loc_length;
+
+    if (parser->shared) {
+        loc_start = (uint32_t) (start - pm->start);
+        loc_length = (uint32_t) (end - start);
+    } else {
+        loc_start = (uint32_t) (parser->node_start - pm->start);
+        loc_length = (uint32_t) (parser->node_end - parser->node_start);
+    }
+
+    pm_diagnostic_list_append_format(&pm->metadata_arena, &pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message);
 }
 
 /**
+ * Append a formatted diagnostic error with proper shared/non-shared location
+ * handling. This is a macro because we need variadic args for the format string.
+ */
+#define pm_regexp_parse_error_format(parser_, err_start_, err_end_, diag_id, ...) \
+    do { \
+        pm_parser_t *pm__ = (parser_)->parser; \
+        uint32_t loc_start__, loc_length__; \
+        if ((parser_)->shared) { \
+            loc_start__ = (uint32_t) ((err_start_) - pm__->start); \
+            loc_length__ = (uint32_t) ((err_end_) - (err_start_)); \
+        } else { \
+            loc_start__ = (uint32_t) ((parser_)->node_start - pm__->start); \
+            loc_length__ = (uint32_t) ((parser_)->node_end - (parser_)->node_start); \
+        } \
+        pm_diagnostic_list_append_format(&pm__->metadata_arena, &pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \
+    } while (0)
+
+/**
  * This appends a new string to the list of named captures. This function
  * assumes the caller has already checked the validity of the name callback.
  */
@@ -59,14 +168,14 @@ static void
 pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
     pm_string_t string;
     pm_string_shared_init(&string, start, end);
-    parser->name_callback(&string, parser->name_data);
-    pm_string_free(&string);
+    parser->name_callback(parser->parser, &string, parser->shared, parser->name_data);
+    pm_string_cleanup(&string);
 }
 
 /**
  * Returns true if the next character is the end of the source.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
     return parser->cursor >= parser->end;
 }
@@ -74,7 +183,7 @@ pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
 /**
  * Optionally accept a char and consume it if it exists.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
     if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
         parser->cursor++;
@@ -86,7 +195,7 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
 /**
  * Expect a character to be present and consume it.
  */
-static inline bool
+static PRISM_INLINE bool
 pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
     if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
         parser->cursor++;
@@ -114,6 +223,47 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
 }
 
 /**
+ * Mark a group boundary in the hex escape byte buffer. When consecutive hex
+ * escape bytes >= 0x80 are followed by a non-hex-escape, this appends a 0x00
+ * sentinel to separate the groups for later multibyte validation.
+ */
+static PRISM_INLINE void
+pm_regexp_hex_group_boundary(pm_regexp_parser_t *parser) {
+    if (parser->hex_group_active) {
+        pm_buffer_append_byte(&parser->hex_escape_buffer, 0x00);
+        parser->hex_group_active = false;
+    }
+}
+
+/**
+ * Track a hex escape byte value >= 0x80 for multibyte validation.
+ */
+static PRISM_INLINE void
+pm_regexp_track_hex_escape(pm_regexp_parser_t *parser, uint8_t byte) {
+    if (byte >= 0x80) {
+        pm_buffer_append_byte(&parser->hex_escape_buffer, byte);
+        parser->hex_group_active = true;
+        parser->has_hex_escape = true;
+
+        parser->explicit_encoding = parser->encoding;
+        parser->last_escape_was_unicode = false;
+    } else {
+        pm_regexp_hex_group_boundary(parser);
+    }
+}
+
+/**
+ * Parse a hex digit character and return its value, or -1 if not a hex digit.
+ */
+static PRISM_INLINE int
+pm_regexp_hex_digit_value(uint8_t byte) {
+    if (byte >= '0' && byte <= '9') return byte - '0';
+    if (byte >= 'a' && byte <= 'f') return byte - 'a' + 10;
+    if (byte >= 'A' && byte <= 'F') return byte - 'A' + 10;
+    return -1;
+}
+
+/**
  * Range quantifiers are a special class of quantifiers that look like
  *
  * * {digit}
@@ -121,13 +271,12 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
  * * {digit,digit}
  * * {,digit}
  *
- * Unfortunately, if there are any spaces in between, then this just becomes a
- * regular character match expression and we have to backtrack. So when this
- * function first starts running, we'll create a "save" point and then attempt
- * to parse the quantifier. If it fails, we'll restore the save point and
- * return.
+ * If there are any spaces in between, then this just becomes a regular
+ * character match expression and we have to backtrack. So when this function
+ * first starts running, we'll create a "save" point and then attempt to parse
+ * the quantifier. If it fails, we'll restore the save point and return.
  *
- * The properly track everything, we're going to build a little state machine.
+ * To properly track everything, we're going to build a little state machine.
  * It looks something like the following:
  *
  *                  +-------+                 +---------+ ------------+
@@ -275,11 +424,393 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
     );
 }
 
+/**
+ * Property escape classification. Onigmo supports three tiers of property
+ * names depending on the encoding:
+ *
+ * - POSIX properties (Alnum, Alpha, ASCII, Blank, Cntrl, Digit, Graph, Lower,
+ *   Print, Punct, Space, Upper, XDigit, Word): valid in all encodings.
+ * - Script properties (Hiragana, Katakana, Han, Latin, Greek, Cyrillic): valid
+ *   in EUC-JP (/e), Windows-31J (/s), and UTF-8 (/u), but not ASCII-8BIT (/n).
+ * - Unicode-only properties (general categories like L, Ll, Lu, etc., plus
+ *   Any, Assigned): valid only in UTF-8 (/u).
+ */
+typedef enum {
+    PM_REGEXP_PROPERTY_POSIX,
+    PM_REGEXP_PROPERTY_SCRIPT,
+    PM_REGEXP_PROPERTY_UNICODE
+} pm_regexp_property_type_t;
+
+/**
+ * Classify a property name. The name may start with '^' for negation, which
+ * is skipped before matching.
+ */
+static pm_regexp_property_type_t
+pm_regexp_classify_property(const uint8_t *name, size_t length) {
+    // Skip leading '^' for negated properties like \p{^Hiragana}.
+    if (length > 0 && name[0] == '^') {
+        name++;
+        length--;
+    }
+
+#define PM_REGEXP_CASECMP(str_) (pm_strncasecmp(name, (const uint8_t *) (str_), length) == 0)
+
+    switch (length) {
+        case 3:
+            if (PM_REGEXP_CASECMP("Han")) return PM_REGEXP_PROPERTY_SCRIPT;
+            break;
+        case 4:
+            if (PM_REGEXP_CASECMP("Word")) return PM_REGEXP_PROPERTY_POSIX;
+            break;
+        case 5:
+            /* Most properties are length 5, so dispatch on first character. */
+            switch (name[0] | 0x20) {
+                case 'a':
+                    if (PM_REGEXP_CASECMP("Alnum")) return PM_REGEXP_PROPERTY_POSIX;
+                    if (PM_REGEXP_CASECMP("Alpha")) return PM_REGEXP_PROPERTY_POSIX;
+                    if (PM_REGEXP_CASECMP("ASCII")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 'b':
+                    if (PM_REGEXP_CASECMP("Blank")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 'c':
+                    if (PM_REGEXP_CASECMP("Cntrl")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 'd':
+                    if (PM_REGEXP_CASECMP("Digit")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 'g':
+                    if (PM_REGEXP_CASECMP("Graph")) return PM_REGEXP_PROPERTY_POSIX;
+                    if (PM_REGEXP_CASECMP("Greek")) return PM_REGEXP_PROPERTY_SCRIPT;
+                    break;
+                case 'l':
+                    if (PM_REGEXP_CASECMP("Lower")) return PM_REGEXP_PROPERTY_POSIX;
+                    if (PM_REGEXP_CASECMP("Latin")) return PM_REGEXP_PROPERTY_SCRIPT;
+                    break;
+                case 'p':
+                    if (PM_REGEXP_CASECMP("Print")) return PM_REGEXP_PROPERTY_POSIX;
+                    if (PM_REGEXP_CASECMP("Punct")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 's':
+                    if (PM_REGEXP_CASECMP("Space")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+                case 'u':
+                    if (PM_REGEXP_CASECMP("Upper")) return PM_REGEXP_PROPERTY_POSIX;
+                    break;
+            }
+            break;
+        case 6:
+            if (PM_REGEXP_CASECMP("XDigit")) return PM_REGEXP_PROPERTY_POSIX;
+            break;
+        case 8:
+            if (PM_REGEXP_CASECMP("Hiragana")) return PM_REGEXP_PROPERTY_SCRIPT;
+            if (PM_REGEXP_CASECMP("Katakana")) return PM_REGEXP_PROPERTY_SCRIPT;
+            if (PM_REGEXP_CASECMP("Cyrillic")) return PM_REGEXP_PROPERTY_SCRIPT;
+            break;
+    }
+
+#undef PM_REGEXP_CASECMP
+
+    // Everything else is Unicode-only (general categories, other scripts, etc.).
+    return PM_REGEXP_PROPERTY_UNICODE;
+}
+
+/**
+ * Check for and skip a `\p{...}` or `\P{...}` Unicode property escape. The
+ * cursor should be pointing at 'p' or 'P' when this is called. If a property
+ * escape is found, record it on the regexp parser and advance past the closing
+ * '}'.
+ *
+ * Properties are classified into three tiers (POSIX, script, Unicode-only) to
+ * determine which encoding modifiers they are valid with.
+ */
+static bool
+pm_regexp_parse_property_escape(pm_regexp_parser_t *parser) {
+    assert(*parser->cursor == 'p' || *parser->cursor == 'P');
+
+    if (parser->cursor + 1 < parser->end && parser->cursor[1] == '{') {
+        const uint8_t *name_start = parser->cursor + 2;
+        const uint8_t *search = name_start;
+
+        while (search < parser->end && *search != '}') search++;
+
+        if (search < parser->end) {
+            size_t name_length = (size_t) (search - name_start);
+            parser->has_property_escape = true;
+
+            pm_regexp_property_type_t type = pm_regexp_classify_property(name_start, name_length);
+
+            // Track the first non-POSIX property name (for /n error messages).
+            if (type >= PM_REGEXP_PROPERTY_SCRIPT && parser->property_name == NULL) {
+                parser->property_name = name_start;
+                parser->property_name_length = name_length;
+            }
+
+            // Track the first Unicode-only property name (for /e, /s error messages).
+            if (type == PM_REGEXP_PROPERTY_UNICODE) {
+                parser->has_unicode_property_escape = true;
+                if (parser->unicode_property_name == NULL) {
+                    parser->unicode_property_name = name_start;
+                    parser->unicode_property_name_length = name_length;
+                }
+            }
+
+            parser->cursor = search + 1; // skip past '}'
+            return true;
+        }
+    }
+
+    // Not a property escape, just skip the single character after '\'.
+    parser->cursor++;
+    return false;
+}
+
+/**
+ * Validate and skip a \u escape sequence in a regular expression. The cursor
+ * should be pointing at the character after 'u' when this is called. This
+ * handles both the \u{NNNN MMMM} and \uNNNN forms. Also tracks encoding
+ * state for validation.
+ */
+static void
+pm_regexp_parse_unicode_escape(pm_regexp_parser_t *parser) {
+    const uint8_t *escape_start = parser->cursor - 2; // points to '\'
+
+    if (pm_regexp_char_is_eof(parser)) {
+        pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode escape");
+        return;
+    }
+
+    if (*parser->cursor == '{') {
+        parser->cursor++; // skip '{'
+
+        // Skip leading whitespace.
+        while (!pm_regexp_char_is_eof(parser) && pm_char_is_whitespace(*parser->cursor)) {
+            parser->cursor++;
+        }
+
+        bool has_codepoint = false;
+
+        while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '}') {
+            // Parse the hex digits to compute the codepoint value.
+            uint32_t value = 0;
+            size_t hex_count = 0;
+
+            int digit;
+            while (!pm_regexp_char_is_eof(parser) && (digit = pm_regexp_hex_digit_value(*parser->cursor)) >= 0) {
+                value = (value << 4) | (uint32_t) digit;
+                hex_count++;
+                parser->cursor++;
+            }
+
+            if (hex_count == 0) {
+                // Skip to '}' or end of regexp to find the full extent.
+                while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '}') {
+                    parser->cursor++;
+                }
+
+                const uint8_t *escape_end = parser->cursor;
+                if (!pm_regexp_char_is_eof(parser)) {
+                    escape_end++;
+                    parser->cursor++; // skip '}'
+                }
+
+                pm_regexp_parse_error_format(parser, escape_start, escape_end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (escape_end - escape_start), (const char *) escape_start);
+                return;
+            }
+
+            if (hex_count > 6) {
+                pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode range");
+            }
+
+            // Track encoding state for this codepoint.
+            if (value >= 0x80) {
+                parser->has_unicode_escape = true;
+                parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
+                parser->last_escape_was_unicode = true;
+                pm_regexp_hex_group_boundary(parser);
+            }
+
+            // Check for invalid Unicode range (surrogates or > 0x10FFFF).
+            if (value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) {
+                parser->invalid_unicode_range = true;
+            }
+
+            has_codepoint = true;
+
+            // Skip whitespace between codepoints.
+            while (!pm_regexp_char_is_eof(parser) && pm_char_is_whitespace(*parser->cursor)) {
+                parser->cursor++;
+            }
+        }
+
+        if (pm_regexp_char_is_eof(parser)) {
+            pm_regexp_parse_error(parser, escape_start, parser->cursor, "unterminated Unicode escape");
+        } else {
+            if (!has_codepoint) {
+                pm_regexp_parse_error_format(parser, escape_start, parser->cursor + 1, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->cursor + 1 - escape_start), (const char *) escape_start);
+            }
+            parser->cursor++; // skip '}'
+        }
+    } else {
+        // \uNNNN form — need exactly 4 hex digits.
+        uint32_t value = 0;
+        size_t hex_count = 0;
+
+        int digit;
+        while (hex_count < 4 && !pm_regexp_char_is_eof(parser) && (digit = pm_regexp_hex_digit_value(*parser->cursor)) >= 0) {
+            value = (value << 4) | (uint32_t) digit;
+            hex_count++;
+            parser->cursor++;
+        }
+
+        if (hex_count < 4) {
+            pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode escape");
+        } else if (value >= 0x80) {
+            parser->has_unicode_escape = true;
+            parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
+            parser->last_escape_was_unicode = true;
+            pm_regexp_hex_group_boundary(parser);
+        }
+
+        // Check for invalid Unicode range.
+        if (hex_count == 4 && (value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))) {
+            parser->invalid_unicode_range = true;
+        }
+    }
+}
+
 // Forward declaration because character sets can be nested.
 static bool
 pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
 
 /**
+ * Parse a \x escape and return the byte value. The cursor should be pointing
+ * at the character after 'x'. Returns -1 if no hex digits follow.
+ */
+static int
+pm_regexp_parse_hex_escape(pm_regexp_parser_t *parser) {
+    int value = -1;
+
+    if (!pm_regexp_char_is_eof(parser)) {
+        int digit = pm_regexp_hex_digit_value(*parser->cursor);
+        if (digit >= 0) {
+            value = digit;
+            parser->cursor++;
+
+            if (!pm_regexp_char_is_eof(parser)) {
+                digit = pm_regexp_hex_digit_value(*parser->cursor);
+                if (digit >= 0) {
+                    value = (value << 4) | digit;
+                    parser->cursor++;
+                }
+            }
+        }
+    }
+
+    if (value >= 0) {
+        pm_regexp_track_hex_escape(parser, (uint8_t) value);
+    }
+
+    return value;
+}
+
+/**
+ * Parse a backslash escape sequence in a regexp, handling \u (unicode),
+ * \p/\P (property), \x (hex), and other single-character escapes. Also
+ * tracks encoding state for \M-x and \C-\M-x escapes.
+ */
+static void
+pm_regexp_parse_backslash_escape(pm_regexp_parser_t *parser) {
+    if (pm_regexp_char_is_eof(parser)) return;
+
+    switch (*parser->cursor) {
+        case 'u':
+            parser->cursor++; // skip 'u'
+            pm_regexp_parse_unicode_escape(parser);
+            break;
+        case 'p':
+        case 'P':
+            pm_regexp_parse_property_escape(parser);
+            break;
+        case 'x':
+            parser->cursor++; // skip 'x'
+            pm_regexp_parse_hex_escape(parser);
+            break;
+        case 'M':
+            // \M-x produces (x | 0x80), always >= 0x80
+            if (parser->cursor + 2 < parser->end && parser->cursor[1] == '-') {
+                parser->cursor += 2; // skip 'M-'
+                if (!pm_regexp_char_is_eof(parser)) {
+                    if (*parser->cursor == '\\') {
+                        parser->cursor++;
+                        // \M-\C-x or \M-\cx — the resulting byte is always >= 0x80
+                        // We just need to track it as a hex escape >= 0x80.
+                        pm_regexp_parse_backslash_escape(parser);
+                    } else {
+                        parser->cursor++;
+                    }
+                    // \M-x always produces a byte >= 0x80
+                    pm_regexp_track_hex_escape(parser, 0x80);
+                }
+            } else {
+                parser->cursor++;
+            }
+            break;
+        case 'C':
+            // \C-x produces (x & 0x1F)
+            if (parser->cursor + 2 < parser->end && parser->cursor[1] == '-') {
+                parser->cursor += 2; // skip 'C-'
+                if (!pm_regexp_char_is_eof(parser)) {
+                    if (*parser->cursor == '\\') {
+                        parser->cursor++;
+                        pm_regexp_parse_backslash_escape(parser);
+                    } else {
+                        parser->cursor++;
+                    }
+                }
+            } else {
+                parser->cursor++;
+            }
+            break;
+        case 'c':
+            // \cx produces (x & 0x1F)
+            parser->cursor++; // skip 'c'
+            if (!pm_regexp_char_is_eof(parser)) {
+                if (*parser->cursor == '\\') {
+                    parser->cursor++;
+                    pm_regexp_parse_backslash_escape(parser);
+                } else {
+                    parser->cursor++;
+                }
+            }
+            break;
+        default:
+            pm_regexp_hex_group_boundary(parser);
+            parser->cursor++;
+            break;
+    }
+}
+
+/**
+ * Check if a byte at the current position is a non-ASCII byte in a multibyte
+ * encoding that produces an invalid character. If so, emit an error at the
+ * byte location immediately.
+ */
+static void
+pm_regexp_parse_invalid_multibyte(pm_regexp_parser_t *parser, const uint8_t *cursor) {
+    uint8_t byte = *cursor;
+    if (byte >= 0x80 && parser->encoding_changed && parser->encoding->multibyte) {
+        size_t width = parser->encoding->char_width(cursor, (ptrdiff_t) (parser->end - cursor));
+        if (width > 1) {
+            parser->cursor += width - 1;
+        } else if (width == 0) {
+            parser->has_invalid_multibyte = true;
+            pm_regexp_parse_error_format(parser, cursor, cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+        }
+    }
+}
+
+/**
  * match-char-set : '[' '^'? (match-range | match-char)* ']'
  *                ;
  */
@@ -293,12 +824,16 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
                 pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
                 break;
             case '\\':
-                if (!pm_regexp_char_is_eof(parser)) {
-                    parser->cursor++;
-                }
+                pm_regexp_parse_backslash_escape(parser);
                 break;
             default:
-                // do nothing, we've already advanced the cursor
+                // We've already advanced the cursor by one byte. If the byte
+                // was >= 0x80 in a multibyte encoding, we may need to consume
+                // additional continuation bytes and validate the character.
+                if (*(parser->cursor - 1) >= 0x80) {
+                    parser->non_ascii_literal_count++;
+                }
+                pm_regexp_parse_invalid_multibyte(parser, parser->cursor - 1);
                 break;
         }
     }
@@ -354,8 +889,13 @@ typedef enum {
 // These are the options that are configurable on the regular expression (or
 // from within a group).
 
+/** The minimum character value for a regexp option slot. */
 #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
+
+/** The maximum character value for a regexp option slot. */
 #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
+
+/** The number of regexp option slots. */
 #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
 
 /**
@@ -498,7 +1038,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
                         }
 
                         size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
-                        if (width == 0) return false;
+                        if (width == 0) {
+                            if (*parser->cursor >= 0x80) {
+                                parser->has_invalid_multibyte = true;
+                                pm_regexp_parse_error_format(parser, parser->cursor, parser->cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+                                parser->cursor++;
+                                continue;
+                            }
+                            return false;
+                        }
 
                         escaped = (width == 1) && (*parser->cursor == '\\');
                         parser->cursor += width;
@@ -686,9 +1234,7 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
             return pm_regexp_parse_quantifier(parser);
         case '\\':
             parser->cursor++;
-            if (!pm_regexp_char_is_eof(parser)) {
-                parser->cursor++;
-            }
+            pm_regexp_parse_backslash_escape(parser);
             return pm_regexp_parse_quantifier(parser);
         case '(':
             parser->cursor++;
@@ -720,9 +1266,30 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
                 width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
             }
 
-            if (width == 0) return false; // TODO: add appropriate error
-            parser->cursor += width;
+            if (width == 0) {
+                if (*parser->cursor >= 0x80 && parser->encoding_changed) {
+                    if (parser->encoding->multibyte) {
+                        // Invalid multibyte character in a multibyte encoding.
+                        // Emit the error at the byte location immediately.
+                        parser->has_invalid_multibyte = true;
+                        pm_regexp_parse_error_format(parser, parser->cursor, parser->cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+                    } else {
+                        // Non-ASCII byte in a single-byte encoding (e.g.,
+                        // US-ASCII). Count it for later error reporting.
+                        parser->non_ascii_literal_count++;
+                    }
+                    parser->cursor++;
+                    return pm_regexp_parse_quantifier(parser);
+                }
+                return false;
+            }
+
+            // Count non-ASCII literal bytes.
+            for (size_t i = 0; i < width; i++) {
+                if (parser->cursor[i] >= 0x80) parser->non_ascii_literal_count++;
+            }
 
+            parser->cursor += width;
             return pm_regexp_parse_quantifier(parser);
         }
     }
@@ -768,13 +1335,354 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
     return pm_regexp_char_is_eof(parser);
 }
 
+// ---------------------------------------------------------------------------
+// Encoding validation
+// ---------------------------------------------------------------------------
+
 /**
- * Parse a regular expression and extract the names of all of the named capture
- * groups.
+ * Validate that groups of hex escape bytes in the buffer form valid multibyte
+ * characters in the given encoding. Groups are separated by 0x00 sentinels.
+ */
+static bool
+pm_regexp_validate_hex_escapes(const pm_encoding_t *encoding, const pm_buffer_t *buffer) {
+    const uint8_t *data = (const uint8_t *) pm_buffer_value(buffer);
+    size_t len = pm_buffer_length(buffer);
+    size_t i = 0;
+
+    while (i < len) {
+        size_t group_start = i;
+        while (i < len && data[i] != 0x00) i++;
+
+        for (size_t j = group_start; j < i; ) {
+            size_t width = encoding->char_width(data + j, (ptrdiff_t) (i - j));
+            if (width == 0) return false;
+            j += width;
+        }
+
+        if (i < len) i++; // skip sentinel
+    }
+
+    return true;
+}
+
+/**
+ * Format regexp source content for use in error messages, hex-escaping
+ * non-ASCII bytes.
+ */
+static void
+pm_regexp_format_for_error(pm_buffer_t *buffer, const pm_encoding_t *encoding, const uint8_t *source, size_t length) {
+    size_t index = 0;
+
+    if (encoding == PM_ENCODING_UTF_8_ENTRY) {
+        pm_buffer_append_string(buffer, (const char *) source, length);
+        return;
+    }
+
+    while (index < length) {
+        if (source[index] < 0x80) {
+            pm_buffer_append_byte(buffer, source[index]);
+            index++;
+        } else if (encoding->multibyte) {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (length - index));
+
+            if (width > 1) {
+                pm_buffer_append_string(buffer, "\\x{", 3);
+                for (size_t i = 0; i < width; i++) {
+                    pm_buffer_append_format(buffer, "%02X", source[index + i]);
+                }
+                pm_buffer_append_byte(buffer, '}');
+                index += width;
+            } else {
+                pm_buffer_append_format(buffer, "\\x%02X", source[index]);
+                index++;
+            }
+        } else {
+            pm_buffer_append_format(buffer, "\\x%02X", source[index]);
+            index++;
+        }
+    }
+}
+
+/**
+ * Emit an encoding validation error on the regexp node.
+ */
+#define PM_REGEXP_ENCODING_ERROR(parser, diag_id, ...) \
+    pm_diagnostic_list_append_format( \
+        &(parser)->parser->metadata_arena, \
+        &(parser)->parser->error_list, \
+        (uint32_t) ((parser)->node_start - (parser)->parser->start), \
+        (uint32_t) ((parser)->node_end - (parser)->node_start), \
+        diag_id, __VA_ARGS__)
+
+/**
+ * Validate encoding for a regexp with an encoding modifier (/e, /s, /u, /n).
+ *
+ * The decision tree is:
+ *
+ * 1. No escape-set encoding (explicit_encoding == NULL):
+ *    a. ASCII-only content: validate property escapes, return forced US-ASCII
+ *       for /n or the modifier flags for others.
+ *    b. US-ASCII source with non-ASCII literals: emit per-byte errors.
+ *    c. Source encoding differs from modifier encoding: emit mismatch error.
+ *
+ * 2. Mixed \u and \x escapes: emit the appropriate conflict error depending
+ *    on the modifier and which escape type was last.
+ *
+ * 3. \u escape with non-/u modifier: incompatible encoding error.
+ *
+ * 4. Validate that hex escape byte sequences form valid multibyte characters
+ *    in the modifier's encoding.
+ */
+static pm_node_flags_t
+pm_regexp_validate_encoding_modifier(pm_regexp_parser_t *parser, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding, const char *source_start, int source_length) {
+
+    if (parser->explicit_encoding == NULL) {
+        if (ascii_only) {
+            // Check property escapes against the modifier's encoding tier.
+            // /n (ASCII-8BIT): only POSIX properties are valid.
+            // /e, /s: POSIX and script properties are valid.
+            // /u: all properties are valid.
+            if (modifier == 'n' && parser->property_name != NULL) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_CHAR_PROPERTY,
+                    (int) parser->property_name_length, (const char *) parser->property_name,
+                    source_length, source_start);
+            } else if (modifier != 'u' && parser->has_unicode_property_escape) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_CHAR_PROPERTY,
+                    (int) parser->unicode_property_name_length, (const char *) parser->unicode_property_name,
+                    source_length, source_start);
+            }
+            return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
+        }
+
+        if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            for (uint32_t i = 0; i < parser->non_ascii_literal_count; i++) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+            }
+        } else if (parser->encoding != modifier_encoding) {
+            PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
+
+            if (modifier == 'n' && !ascii_only) {
+                pm_buffer_t formatted = { 0 };
+                pm_regexp_format_for_error(&formatted, parser->encoding, (const uint8_t *) source_start, (size_t) source_length);
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) formatted.length, (const char *) formatted.value);
+                pm_buffer_cleanup(&formatted);
+            }
+        }
+
+        return flags;
+    }
+
+    // Mixed unicode + hex escapes.
+    if (parser->has_unicode_escape && parser->has_hex_escape) {
+        if (modifier == 'n') {
+            if (parser->last_escape_was_unicode) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP, source_length, source_start);
+            } else {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8, source_length, source_start);
+            }
+        } else {
+            if (!pm_regexp_validate_hex_escapes(modifier_encoding, &parser->hex_escape_buffer)) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+            }
+        }
+
+        return flags;
+    }
+
+    if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+        if (parser->last_escape_was_unicode) {
+            PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, source_length, source_start);
+        } else if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+            PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, source_length, source_start);
+        }
+    }
+
+    if (modifier != 'n' && !pm_regexp_validate_hex_escapes(modifier_encoding, &parser->hex_escape_buffer)) {
+        PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+    }
+
+    return flags;
+}
+
+/**
+ * Validate encoding for a regexp without a modifier and compute the encoding
+ * flags to set on the node.
+ *
+ * The decision tree is:
+ *
+ * 1. If a modifier (/n, /u, /e, /s) is present, delegate to
+ *    pm_regexp_validate_encoding_modifier.
+ * 2. Invalid multibyte chars or unicode ranges: suppress further checks (errors
+ *    were already emitted during parsing).
+ * 3. US-ASCII source with non-ASCII literals: emit per-byte errors.
+ * 4. ASCII-only content: return forced US-ASCII (or forced UTF-8 if \p{...}).
+ * 5. Escape-set encoding present: validate hex escapes against the target
+ *    encoding, handle mixed \u + \x conflicts, and return the appropriate
+ *    forced encoding flag.
+ */
+static pm_node_flags_t
+pm_regexp_validate_encoding(pm_regexp_parser_t *parser, bool ascii_only, pm_node_flags_t flags, const char *source_start, int source_length) {
+
+    // Invalid multibyte characters suppress further validation.
+    // Errors were already emitted at the byte locations during parsing.
+    if (parser->has_invalid_multibyte) {
+        return flags;
+    }
+
+    if (parser->invalid_unicode_range) {
+        PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, source_length, source_start);
+        return flags;
+    }
+
+    // Check modifier flags first.
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
+        return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY, source_start, source_length);
+    }
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
+        return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY, source_start, source_length);
+    }
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
+        return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY, source_start, source_length);
+    }
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
+        return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY, source_start, source_length);
+    }
+
+    // No modifier — check for non-ASCII literals in US-ASCII encoding.
+    if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
+        for (uint32_t i = 0; i < parser->non_ascii_literal_count; i++) {
+            PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+        }
+    }
+
+    // ASCII-only regexps get downgraded to US-ASCII, unless property escapes
+    // force UTF-8.
+    if (ascii_only) {
+        if (parser->has_property_escape) {
+            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+        }
+        return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
+    }
+
+    // Check explicit encoding from escape sequences.
+    if (parser->explicit_encoding != NULL) {
+        // Mixed unicode + hex escapes without modifier.
+        if (parser->has_unicode_escape && parser->has_hex_escape && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+            if (parser->encoding != PM_ENCODING_US_ASCII_ENTRY &&
+                parser->encoding != PM_ENCODING_ASCII_8BIT_ENTRY &&
+                !pm_regexp_validate_hex_escapes(parser->encoding, &parser->hex_escape_buffer)) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+            } else if (parser->last_escape_was_unicode) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP, source_length, source_start);
+            } else {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8, source_length, source_start);
+            }
+
+            return 0;
+        }
+
+        if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            if (!pm_regexp_validate_hex_escapes(parser->explicit_encoding, &parser->hex_escape_buffer)) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+            }
+
+            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+        } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
+        } else {
+            if (!pm_regexp_validate_hex_escapes(parser->explicit_encoding, &parser->hex_escape_buffer)) {
+                PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+            }
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Parse a regular expression, validate its encoding, and optionally extract
+ * named capture groups. Encoding validation walks the raw source (content_loc)
+ * to distinguish escape-produced bytes from literal bytes. Named capture
+ * extraction walks the unescaped content since escape sequences in group names
+ * (e.g., line continuations) have already been processed by the lexer.
+ */
+pm_node_flags_t
+pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) {
+    const uint8_t *source = parser->start + node->content_loc.start;
+    size_t size = node->content_loc.length;
+    bool extended_mode = PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED);
+    pm_node_flags_t flags = PM_NODE_FLAGS(node);
+
+    const uint8_t *node_start = parser->start + node->base.location.start;
+    const uint8_t *node_end = parser->start + node->base.location.start + node->base.location.length;
+
+    // First pass: walk raw source for encoding validation (no name extraction).
+    pm_regexp_parser_t regexp_parser = {
+        .parser = parser,
+        .start = source,
+        .cursor = source,
+        .end = source + size,
+        .extended_mode = extended_mode,
+        .encoding_changed = parser->encoding_changed,
+        .encoding = parser->encoding,
+        .name_callback = NULL,
+        .name_data = NULL,
+        .shared = true,
+        .node_start = node_start,
+        .node_end = node_end,
+        .has_unicode_escape = false,
+        .has_hex_escape = false,
+        .last_escape_was_unicode = false,
+        .explicit_encoding = NULL,
+        .has_property_escape = false,
+        .has_unicode_property_escape = false,
+        .property_name = NULL,
+        .property_name_length = 0,
+        .unicode_property_name = NULL,
+        .unicode_property_name_length = 0,
+        .non_ascii_literal_count = 0,
+        .invalid_unicode_range = false,
+        .hex_escape_buffer = { 0 },
+        .hex_group_active = false,
+        .has_invalid_multibyte = false,
+    };
+
+    pm_regexp_parse_pattern(&regexp_parser);
+
+    // Compute ascii_only from the regexp parser's tracked state. We cannot
+    // use node->unescaped for this because regexp unescaped content preserves
+    // escape text (e.g., \x80 is 4 ASCII chars), not the binary values.
+    bool ascii_only = !regexp_parser.has_hex_escape && !regexp_parser.has_unicode_escape && regexp_parser.non_ascii_literal_count == 0;
+    // Use the unescaped content for error messages to match CRuby's format,
+    // where Ruby escapes like \M-\C-? are resolved to bytes but regexp escapes
+    // like \u{80} are preserved as text.
+    const char *error_source = (const char *) pm_string_source(&node->unescaped);
+    int error_source_length = (int) pm_string_length(&node->unescaped);
+    pm_node_flags_t encoding_flags = pm_regexp_validate_encoding(&regexp_parser, ascii_only, flags, error_source, error_source_length);
+    pm_buffer_cleanup(&regexp_parser.hex_escape_buffer);
+
+    // Second pass: walk unescaped content for named capture extraction.
+    if (name_callback != NULL) {
+        bool shared = node->unescaped.type == PM_STRING_SHARED;
+        pm_regexp_parse_named_captures(parser, pm_string_source(&node->unescaped), pm_string_length(&node->unescaped), shared, extended_mode, name_callback, name_data);
+    }
+
+    return encoding_flags;
+}
+
+/**
+ * Parse an interpolated regular expression for named capture groups only.
+ * This is used for the =~ operator with interpolated regexps where we don't
+ * have a pm_regular_expression_node_t. No encoding validation is performed.
+ *
+ * Note: The encoding-tracking fields (has_unicode_escape, has_hex_escape, etc.)
+ * are initialized but not used for the result. They exist because the parsing
+ * functions (pm_regexp_parse_backslash_escape, etc.) unconditionally update
+ * them as they walk through the content.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
-    pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
+void
+pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) {
+    pm_regexp_parser_t regexp_parser = {
         .parser = parser,
         .start = source,
         .cursor = source,
@@ -784,7 +1692,26 @@ pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool ex
         .encoding = parser->encoding,
         .name_callback = name_callback,
         .name_data = name_data,
-        .error_callback = error_callback,
-        .error_data = error_data
-    });
+        .shared = shared,
+        .node_start = source,
+        .node_end = source + size,
+        .has_unicode_escape = false,
+        .has_hex_escape = false,
+        .last_escape_was_unicode = false,
+        .explicit_encoding = NULL,
+        .has_property_escape = false,
+        .has_unicode_property_escape = false,
+        .property_name = NULL,
+        .property_name_length = 0,
+        .unicode_property_name = NULL,
+        .unicode_property_name_length = 0,
+        .non_ascii_literal_count = 0,
+        .invalid_unicode_range = false,
+        .hex_escape_buffer = { 0 },
+        .hex_group_active = false,
+        .has_invalid_multibyte = false,
+    };
+
+    pm_regexp_parse_pattern(&regexp_parser);
+    pm_buffer_cleanup(&regexp_parser.hex_escape_buffer);
 }
diff --git a/prism/regexp.h b/prism/regexp.h
deleted file mode 100644
index c0b3163e93..0000000000
--- a/prism/regexp.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * @file regexp.h
- *
- * A regular expression parser.
- */
-#ifndef PRISM_REGEXP_H
-#define PRISM_REGEXP_H
-
-#include "prism/defines.h"
-#include "prism/parser.h"
-#include "prism/encoding.h"
-#include "prism/util/pm_memchr.h"
-#include "prism/util/pm_string.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <string.h>
-
-/**
- * This callback is called when a named capture group is found.
- */
-typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
-
-/**
- * This callback is called when a parse error is found.
- */
-typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
-
-/**
- * Parse a regular expression.
- *
- * @param parser The parser that is currently being used.
- * @param source The source code to parse.
- * @param size The size of the source code.
- * @param extended_mode Whether to parse the regular expression in extended mode.
- * @param name_callback The optional callback to call when a named capture group is found.
- * @param name_data The optional data to pass to the name callback.
- * @param error_callback The callback to call when a parse error is found.
- * @param error_data The data to pass to the error callback.
- */
-PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
-
-#endif
diff --git a/prism/serialize.h b/prism/serialize.h
new file mode 100644
index 0000000000..786a1514bc
--- /dev/null
+++ b/prism/serialize.h
@@ -0,0 +1,96 @@
+/**
+ * @file serialize.h
+ *
+ * The functions related to serializing the AST to a binary format.
+ */
+#ifndef PRISM_SERIALIZE_H
+#define PRISM_SERIALIZE_H
+
+#include "prism/excludes.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/buffer.h"
+#include "prism/parser.h"
+#include "prism/source.h"
+#include "prism/stream.h"
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) PRISM_NONNULL(1, 2, 3);
+
+/**
+ * Parse the given source to the AST and dump the AST to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize the AST represented by the given source into the given
+ * buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Lex the given source and serialize to the given buffer.
+ *
+ * @param source The source to lex.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param data The optional data to pass to the lexer.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ * @returns True if the source parses without errors or warnings.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1);
+
+#endif
+
+#endif
diff --git a/prism/source.c b/prism/source.c
new file mode 100644
index 0000000000..f61cb19c1b
--- /dev/null
+++ b/prism/source.c
@@ -0,0 +1,491 @@
+#include "prism/internal/source.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* The following headers are necessary to read files using demand paging. */
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(_POSIX_MAPPED_FILES)
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#elif defined(PRISM_HAS_FILESYSTEM)
+#include <fcntl.h>
+#include <sys/stat.h>
+#endif
+
+static const uint8_t empty_source[] = "";
+
+/**
+ * Allocate and initialize a pm_source_t with the given fields.
+ */
+static pm_source_t *
+pm_source_alloc(const uint8_t *source, size_t length, pm_source_type_t type) {
+    pm_source_t *result = xmalloc(sizeof(pm_source_t));
+    if (result == NULL) abort();
+
+    *result = (struct pm_source_t) {
+        .source = source,
+        .length = length,
+        .type = type
+    };
+
+    return result;
+}
+
+/**
+ * Create a new source that wraps existing constant memory.
+ */
+pm_source_t *
+pm_source_constant_new(const uint8_t *data, size_t length) {
+    return pm_source_alloc(data, length, PM_SOURCE_CONSTANT);
+}
+
+/**
+ * Create a new source that wraps existing shared memory.
+ */
+pm_source_t *
+pm_source_shared_new(const uint8_t *data, size_t length) {
+    return pm_source_alloc(data, length, PM_SOURCE_SHARED);
+}
+
+/**
+ * Create a new source that owns its memory.
+ */
+pm_source_t *
+pm_source_owned_new(uint8_t *data, size_t length) {
+    return pm_source_alloc(data, length, PM_SOURCE_OWNED);
+}
+
+#ifdef _WIN32
+/**
+ * Represents a file handle on Windows, where the path will need to be freed
+ * when the file is closed.
+ */
+typedef struct {
+    /** The path to the file, which will become allocated memory. */
+    WCHAR *path;
+
+    /** The size of the allocated path in bytes. */
+    size_t path_size;
+
+    /** The handle to the file, which will start as uninitialized memory. */
+    HANDLE file;
+} pm_source_file_handle_t;
+
+/**
+ * Open the file indicated by the filepath parameter for reading on Windows.
+ */
+static pm_source_init_result_t
+pm_source_file_handle_open(pm_source_file_handle_t *handle, const char *filepath) {
+    int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
+    if (length == 0) return PM_SOURCE_INIT_ERROR_GENERIC;
+
+    handle->path_size = sizeof(WCHAR) * ((size_t) length);
+    handle->path = xmalloc(handle->path_size);
+    if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
+        xfree_sized(handle->path, handle->path_size);
+        return PM_SOURCE_INIT_ERROR_GENERIC;
+    }
+
+    handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+    if (handle->file == INVALID_HANDLE_VALUE) {
+        pm_source_init_result_t result = PM_SOURCE_INIT_ERROR_GENERIC;
+
+        if (GetLastError() == ERROR_ACCESS_DENIED) {
+            DWORD attributes = GetFileAttributesW(handle->path);
+            if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+                result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+            }
+        }
+
+        xfree_sized(handle->path, handle->path_size);
+        return result;
+    }
+
+    return PM_SOURCE_INIT_SUCCESS;
+}
+
+/**
+ * Close the file handle and free the path.
+ */
+static void
+pm_source_file_handle_close(pm_source_file_handle_t *handle) {
+    xfree_sized(handle->path, handle->path_size);
+    CloseHandle(handle->file);
+}
+#endif
+
+/**
+ * Create a new source by memory-mapping a file.
+ */
+pm_source_t *
+pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) {
+#ifdef _WIN32
+    (void) open_flags;
+
+    /* Open the file for reading. */
+    pm_source_file_handle_t handle;
+    *result = pm_source_file_handle_open(&handle, filepath);
+    if (*result != PM_SOURCE_INIT_SUCCESS) return NULL;
+
+    /* Get the file size. */
+    DWORD file_size = GetFileSize(handle.file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* If the file is empty, then return a constant source. */
+    if (file_size == 0) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_SUCCESS;
+        return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+    }
+
+    /* Create a mapping of the file. */
+    HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Map the file into memory. */
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
+    pm_source_file_handle_close(&handle);
+
+    if (source == NULL) {
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    *result = PM_SOURCE_INIT_SUCCESS;
+    return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_MAPPED);
+#elif defined(_POSIX_MAPPED_FILES)
+    /* Open the file for reading. */
+    int fd = open(filepath, O_RDONLY | open_flags);
+    if (fd == -1) {
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Stat the file to get the file size. */
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Ensure it is a file and not a directory. */
+    if (S_ISDIR(sb.st_mode)) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+        return NULL;
+    }
+
+    /*
+     * For non-regular files (pipes, character devices), return a specific
+     * error so the caller can handle reading through their own I/O layer.
+     */
+    if (!S_ISREG(sb.st_mode)) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_NON_REGULAR;
+        return NULL;
+    }
+
+    /* mmap the file descriptor to virtually get the contents. */
+    size_t size = (size_t) sb.st_size;
+
+    if (size == 0) {
+        close(fd);
+        *result = PM_SOURCE_INIT_SUCCESS;
+        return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+    }
+
+    uint8_t *source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (source == MAP_FAILED) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    close(fd);
+    *result = PM_SOURCE_INIT_SUCCESS;
+    return pm_source_alloc(source, size, PM_SOURCE_MAPPED);
+#else
+    (void) open_flags;
+    return pm_source_file_new(filepath, result);
+#endif
+}
+
+/**
+ * Create a new source by reading a file into a heap-allocated buffer.
+ */
+pm_source_t *
+pm_source_file_new(const char *filepath, pm_source_init_result_t *result) {
+#ifdef _WIN32
+    /* Open the file for reading. */
+    pm_source_file_handle_t handle;
+    *result = pm_source_file_handle_open(&handle, filepath);
+    if (*result != PM_SOURCE_INIT_SUCCESS) return NULL;
+
+    /* Get the file size. */
+    const DWORD file_size = GetFileSize(handle.file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* If the file is empty, return a constant source. */
+    if (file_size == 0) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_SUCCESS;
+        return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+    }
+
+    /* Create a buffer to read the file into. */
+    uint8_t *source = xmalloc(file_size);
+    if (source == NULL) {
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Read the contents of the file. */
+    DWORD bytes_read;
+    if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
+        xfree_sized(source, file_size);
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Check the number of bytes read. */
+    if (bytes_read != file_size) {
+        xfree_sized(source, file_size);
+        pm_source_file_handle_close(&handle);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    pm_source_file_handle_close(&handle);
+    *result = PM_SOURCE_INIT_SUCCESS;
+    return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_OWNED);
+#elif defined(PRISM_HAS_FILESYSTEM)
+    /* Open the file for reading. */
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Stat the file to get the file size. */
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    /* Ensure it is a file and not a directory. */
+    if (S_ISDIR(sb.st_mode)) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+        return NULL;
+    }
+
+    /* Check the size to see if it's empty. */
+    size_t size = (size_t) sb.st_size;
+    if (size == 0) {
+        close(fd);
+        *result = PM_SOURCE_INIT_SUCCESS;
+        return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+    }
+
+    const size_t length = (size_t) size;
+    uint8_t *source = xmalloc(length);
+    if (source == NULL) {
+        close(fd);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    ssize_t bytes_read = read(fd, source, length);
+    close(fd);
+
+    if (bytes_read == -1 || (size_t) bytes_read != length) {
+        xfree_sized(source, length);
+        *result = PM_SOURCE_INIT_ERROR_GENERIC;
+        return NULL;
+    }
+
+    *result = PM_SOURCE_INIT_SUCCESS;
+    return pm_source_alloc(source, length, PM_SOURCE_OWNED);
+#else
+    (void) filepath;
+    *result = PM_SOURCE_INIT_ERROR_GENERIC;
+    perror("pm_source_file_new is not implemented for this platform");
+    return NULL;
+#endif
+}
+
+/**
+ * Create a new source by reading from a stream. This allocates the source
+ * but does not read from the stream yet. Use pm_source_stream_read to read
+ * data.
+ */
+pm_source_t *
+pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) {
+    pm_source_t *source = pm_source_alloc(NULL, 0, PM_SOURCE_STREAM);
+    source->stream.buffer = pm_buffer_new();
+    source->stream.stream = stream;
+    source->stream.fgets = fgets;
+    source->stream.feof = feof;
+    source->stream.eof = false;
+
+    return source;
+}
+
+/**
+ * Read from the stream into the source's internal buffer until __END__ is
+ * encountered or EOF is reached. Updates the source pointer and length.
+ *
+ * Returns true if EOF was reached, false if __END__ was encountered.
+ */
+bool
+pm_source_stream_read(pm_source_t *source) {
+    pm_buffer_t *buffer = source->stream.buffer;
+
+#define LINE_SIZE 4096
+    char line[LINE_SIZE];
+
+    while (memset(line, '\n', LINE_SIZE), source->stream.fgets(line, LINE_SIZE, source->stream.stream) != NULL) {
+        size_t length = LINE_SIZE;
+        while (length > 0 && line[length - 1] == '\n') length--;
+
+        if (length == LINE_SIZE) {
+            /*
+             * If we read a line that is the maximum size and it doesn't end
+             * with a newline, then we'll just append it to the buffer and
+             * continue reading.
+             */
+            length--;
+            pm_buffer_append_string(buffer, line, length);
+            continue;
+        }
+
+        /* Append the line to the buffer. */
+        length--;
+        pm_buffer_append_string(buffer, line, length);
+
+        /*
+         * Check if the line matches the __END__ marker. If it does, then stop
+         * reading and return false. In most circumstances, this means we should
+         * stop reading from the stream so that the DATA constant can pick it
+         * up.
+         */
+        switch (length) {
+            case 7:
+                if (strncmp(line, "__END__", 7) == 0) {
+                    source->source = (const uint8_t *) pm_buffer_value(buffer);
+                    source->length = pm_buffer_length(buffer);
+                    return false;
+                }
+                break;
+            case 8:
+                if (strncmp(line, "__END__\n", 8) == 0) {
+                    source->source = (const uint8_t *) pm_buffer_value(buffer);
+                    source->length = pm_buffer_length(buffer);
+                    return false;
+                }
+                break;
+            case 9:
+                if (strncmp(line, "__END__\r\n", 9) == 0) {
+                    source->source = (const uint8_t *) pm_buffer_value(buffer);
+                    source->length = pm_buffer_length(buffer);
+                    return false;
+                }
+                break;
+        }
+
+        /*
+         * All data should be read via gets. If the string returned by gets
+         * _doesn't_ end with a newline, then we assume we hit EOF condition.
+         */
+        if (source->stream.feof(source->stream.stream)) {
+            break;
+        }
+    }
+
+#undef LINE_SIZE
+
+    source->stream.eof = true;
+    source->source = (const uint8_t *) pm_buffer_value(buffer);
+    source->length = pm_buffer_length(buffer);
+    return true;
+}
+
+/**
+ * Returns whether the stream source has reached EOF.
+ */
+bool
+pm_source_stream_eof(const pm_source_t *source) {
+    return source->stream.eof;
+}
+
+/**
+ * Free the given source and any memory it owns.
+ */
+void
+pm_source_free(pm_source_t *source) {
+    switch (source->type) {
+        case PM_SOURCE_CONSTANT:
+        case PM_SOURCE_SHARED:
+            /* No cleanup needed for the data. */
+            break;
+        case PM_SOURCE_OWNED:
+            xfree_sized((void *) source->source, source->length);
+            break;
+        case PM_SOURCE_MAPPED:
+#if defined(_WIN32)
+            if (source->length > 0) {
+                UnmapViewOfFile((void *) source->source);
+            }
+#elif defined(_POSIX_MAPPED_FILES)
+            if (source->length > 0) {
+                munmap((void *) source->source, source->length);
+            }
+#endif
+            break;
+        case PM_SOURCE_STREAM:
+            pm_buffer_free(source->stream.buffer);
+            break;
+    }
+
+    xfree_sized(source, sizeof(pm_source_t));
+}
+
+/**
+ * Returns the length of the source data in bytes.
+ */
+size_t
+pm_source_length(const pm_source_t *source) {
+    return source->length;
+}
+
+/**
+ * Returns a pointer to the source data.
+ */
+const uint8_t *
+pm_source_source(const pm_source_t *source) {
+    return source->source;
+}
diff --git a/prism/source.h b/prism/source.h
new file mode 100644
index 0000000000..c79987d3fb
--- /dev/null
+++ b/prism/source.h
@@ -0,0 +1,148 @@
+/**
+ * @file source.h
+ *
+ * An opaque type representing the source code being parsed, regardless of
+ * origin (constant memory, file, memory-mapped file, or stream).
+ */
+#ifndef PRISM_SOURCE_H
+#define PRISM_SOURCE_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/filesystem.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * An opaque type representing source code being parsed.
+ */
+typedef struct pm_source_t pm_source_t;
+
+/**
+ * This function is used to retrieve a line of input from a stream. It closely
+ * mirrors that of fgets so that fgets can be used as the default
+ * implementation.
+ */
+typedef char * (pm_source_stream_fgets_t)(char *string, int size, void *stream);
+
+/**
+ * This function is used to check whether a stream is at EOF. It closely mirrors
+ * that of feof so that feof can be used as the default implementation.
+ */
+typedef int (pm_source_stream_feof_t)(void *stream);
+
+/**
+ * Represents the result of initializing a source from a file.
+ */
+typedef enum {
+    /** Indicates that the source was successfully initialized. */
+    PM_SOURCE_INIT_SUCCESS = 0,
+
+    /**
+     * Indicates a generic error from a source init function, where the type
+     * of error should be read from `errno` or `GetLastError()`.
+     */
+    PM_SOURCE_INIT_ERROR_GENERIC = 1,
+
+    /**
+     * Indicates that the file that was attempted to be opened was a directory.
+     */
+    PM_SOURCE_INIT_ERROR_DIRECTORY = 2,
+
+    /**
+     * Indicates that the file is not a regular file (e.g. a pipe or character
+     * device) and the caller should handle reading it.
+     */
+    PM_SOURCE_INIT_ERROR_NON_REGULAR = 3
+} pm_source_init_result_t;
+
+/**
+ * Create a new source that wraps existing constant memory. The memory is not
+ * owned and will not be freed.
+ *
+ * @param data The pointer to the source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length);
+
+/**
+ * Create a new source that wraps existing shared memory. The memory is not
+ * owned and will not be freed. Semantically a "slice" of another source.
+ *
+ * @param data The pointer to the source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length);
+
+/**
+ * Create a new source that owns its memory. The memory will be freed with
+ * xfree when the source is freed.
+ *
+ * @param data The pointer to the heap-allocated source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_owned_new(uint8_t *data, size_t length);
+
+/**
+ * Create a new source by reading a file into a heap-allocated buffer.
+ *
+ * @param filepath The path to the file to read.
+ * @param result Out parameter for the result of the initialization.
+ * @returns A new source, or NULL on error (with result written to out param).
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_file_new(const char *filepath, pm_source_init_result_t *result) PRISM_NONNULL(1, 2);
+
+/**
+ * Create a new source by memory-mapping a file. Falls back to file reading on
+ * platforms without mmap support.
+ *
+ * If the file is a non-regular file (e.g. a pipe or character device),
+ * PM_SOURCE_INIT_ERROR_NON_REGULAR is returned, allowing the caller to handle
+ * it appropriately (e.g. by reading it through their own I/O layer).
+ *
+ * @param filepath The path to the file to read.
+ * @param open_flags Additional flags to pass to open(2) (e.g. O_NONBLOCK).
+ * @param result Out parameter for the result of the initialization.
+ * @returns A new source, or NULL on error (with result written to out param).
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) PRISM_NONNULL(1, 3);
+
+/**
+ * Create a new source by reading from a stream using the provided callbacks.
+ *
+ * @param stream The stream to read from.
+ * @param fgets The function to use to read from the stream.
+ * @param feof The function to use to check if the stream is at EOF.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof);
+
+/**
+ * Free the given source and any memory it owns.
+ *
+ * @param source The source to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_source_free(pm_source_t *source) PRISM_NONNULL(1);
+
+/**
+ * Returns the length of the source data in bytes.
+ *
+ * @param source The source to get the length of.
+ * @returns The length of the source data.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_source_length(const pm_source_t *source) PRISM_NONNULL(1);
+
+/**
+ * Returns a pointer to the source data.
+ *
+ * @param source The source to get the data of.
+ * @returns A pointer to the source data.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_source_source(const pm_source_t *source) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/srcs.mk b/prism/srcs.mk
new file mode 100644
index 0000000000..93ad8f579f
--- /dev/null
+++ b/prism/srcs.mk
@@ -0,0 +1,160 @@
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs uncommon.mk: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+	touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+		$(PRISM_SRCDIR)/templates/template.rb \
+		$(PRISM_SRCDIR)/srcs.mk.in
+	$(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/srcs.mk $(PRISM_SRCDIR)/srcs.mk.in
+
+distclean-prism-srcs::
+	$(RM) prism/.srcs.mk.time
+	$(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+	$(RM) $(PRISM_SRCDIR)/srcs.mk
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: prism-srcs
+main incs: prism-incs
+
+prism-srcs: $(srcdir)/prism/api_node.c
+$(srcdir)/prism/api_node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/ext/prism/api_node.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) ext/prism/api_node.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/api_node.c
+
+prism-incs: $(srcdir)/prism/ast.h
+$(srcdir)/prism/ast.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/ast.h.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/ast.h $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/ast.h
+
+prism-incs: $(srcdir)/prism/internal/diagnostic.h
+$(srcdir)/prism/internal/diagnostic.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/internal/diagnostic.h.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/internal/diagnostic.h $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/internal/diagnostic.h
+
+prism-srcs: $(srcdir)/lib/prism/compiler.rb
+$(srcdir)/lib/prism/compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/compiler.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/compiler.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/compiler.rb
+
+prism-srcs: $(srcdir)/lib/prism/dispatcher.rb
+$(srcdir)/lib/prism/dispatcher.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dispatcher.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dispatcher.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dispatcher.rb
+
+prism-srcs: $(srcdir)/lib/prism/dot_visitor.rb
+$(srcdir)/lib/prism/dot_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dot_visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dot_visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dot_visitor.rb
+
+prism-srcs: $(srcdir)/lib/prism/dsl.rb
+$(srcdir)/lib/prism/dsl.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dsl.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dsl.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dsl.rb
+
+prism-srcs: $(srcdir)/lib/prism/inspect_visitor.rb
+$(srcdir)/lib/prism/inspect_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/inspect_visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/inspect_visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/inspect_visitor.rb
+
+prism-srcs: $(srcdir)/lib/prism/mutation_compiler.rb
+$(srcdir)/lib/prism/mutation_compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/mutation_compiler.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/mutation_compiler.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/mutation_compiler.rb
+
+prism-srcs: $(srcdir)/lib/prism/node.rb
+$(srcdir)/lib/prism/node.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/node.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/node.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/node.rb
+
+prism-srcs: $(srcdir)/lib/prism/reflection.rb
+$(srcdir)/lib/prism/reflection.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/reflection.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/reflection.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/reflection.rb
+
+prism-srcs: $(srcdir)/lib/prism/serialize.rb
+$(srcdir)/lib/prism/serialize.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/serialize.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/serialize.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/serialize.rb
+
+prism-srcs: $(srcdir)/lib/prism/visitor.rb
+$(srcdir)/lib/prism/visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/visitor.rb
+
+prism-srcs: $(srcdir)/prism/diagnostic.c
+$(srcdir)/prism/diagnostic.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/diagnostic.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/diagnostic.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/diagnostic.c
+
+prism-srcs: $(srcdir)/prism/json.c
+$(srcdir)/prism/json.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/json.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/json.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/json.c
+
+prism-srcs: $(srcdir)/prism/node.c
+$(srcdir)/prism/node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/node.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/node.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/node.c
+
+prism-srcs: $(srcdir)/prism/prettyprint.c
+$(srcdir)/prism/prettyprint.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/prettyprint.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/prettyprint.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/prettyprint.c
+
+prism-srcs: $(srcdir)/prism/serialize.c
+$(srcdir)/prism/serialize.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/serialize.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/serialize.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/serialize.c
+
+prism-srcs: $(srcdir)/prism/tokens.c
+$(srcdir)/prism/tokens.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/tokens.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/tokens.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/tokens.c
diff --git a/prism/srcs.mk.in b/prism/srcs.mk.in
new file mode 100644
index 0000000000..6149e4ae9d
--- /dev/null
+++ b/prism/srcs.mk.in
@@ -0,0 +1,52 @@
+<% # -*- ruby -*-
+# :stopdoc:
+require_relative 'templates/template'
+
+script = File.basename(__FILE__)
+srcs = output ? File.basename(output) : script.chomp('.in')
+mk = 'uncommon.mk'
+
+# %>
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs <%=%><%=mk%>: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+	touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+		$(PRISM_SRCDIR)/templates/template.rb \
+		$(PRISM_SRCDIR)/<%=%><%=script%>
+	$(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/<%=%><%=srcs%> $(PRISM_SRCDIR)/<%=%><%=script%>
+
+distclean-prism-srcs::
+	$(RM) prism/.srcs.mk.time
+	$(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+	$(RM) $(PRISM_SRCDIR)/<%=%><%=srcs%>
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: prism-srcs
+main incs: prism-incs
+<% Prism::Template::TEMPLATES.map do |t|
+  /\.(?:[ch]|rb)\z/ =~ t or next
+  s = '$(srcdir)/' + t.sub(%r[\A(?:(src)|ext|include)/]) {$1 && 'prism/'}
+  s.sub!(%r[\A\$(srcdir)/prism/], '$(PRISM_SRCDIR)/')
+  target = s.end_with?('.h') ? 'incs' : 'srcs'
+# %>
+
+prism-<%=%><%=target%>: <%=%><%=s%>
+<%=%><%=s%>: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/<%=%><%=t%>.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) <%=%><%=t%> $@
+
+realclean-prism-srcs::
+	$(RM) <%=%><%=s%>
+<%
+end
+# %>
diff --git a/prism/static_literals.c b/prism/static_literals.c
index 9fa37b999a..9af1eadf5d 100644
--- a/prism/static_literals.c
+++ b/prism/static_literals.c
@@ -1,4 +1,18 @@
-#include "prism/static_literals.h"
+#include "prism/internal/static_literals.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/isinf.h"
+#include "prism/internal/stringy.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
 
 /**
  * A small struct used for passing around a subset of the information that is
@@ -7,7 +21,10 @@
  */
 typedef struct {
     /** The list of newline offsets to use to calculate line numbers. */
-    const pm_newline_list_t *newline_list;
+    const pm_line_offset_list_t *line_offsets;
+
+    /** The start of the source being parsed. */
+    const uint8_t *start;
 
     /** The line number that the parser starts on. */
     int32_t start_line;
@@ -16,7 +33,7 @@ typedef struct {
     const char *encoding_name;
 } pm_static_literals_metadata_t;
 
-static inline uint32_t
+static PRISM_INLINE uint32_t
 murmur_scramble(uint32_t value) {
     value *= 0xcc9e2d51;
     value = (value << 15) | (value >> 17);
@@ -92,7 +109,7 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
         }
         case PM_SOURCE_LINE_NODE: {
             // Source lines hash their line number.
-            const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
+            const pm_line_column_t line_column = pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line);
             const int32_t *value = &line_column.line;
             return murmur_hash((const uint8_t *) value, sizeof(int32_t));
         }
@@ -180,7 +197,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
         }
 
         // Finally, free the old node list and update the hash.
-        xfree(hash->nodes);
+        xfree_sized(hash->nodes, hash->capacity * sizeof(pm_node_t *));
         hash->nodes = new_nodes;
         hash->capacity = new_capacity;
     }
@@ -218,7 +235,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
  */
 static void
 pm_node_hash_free(pm_node_hash_t *hash) {
-    if (hash->capacity > 0) xfree(hash->nodes);
+    if (hash->capacity > 0) xfree_sized(hash->nodes, hash->capacity * sizeof(pm_node_t *));
 }
 
 /**
@@ -240,7 +257,7 @@ pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *n
             return integer->negative ? -value : value;
         }
         case PM_SOURCE_LINE_NODE:
-            return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
+            return (int64_t) pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line).line;
         default:
             assert(false && "unreachable");
             return 0;
@@ -268,7 +285,7 @@ pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm
  * A comparison function for comparing two FloatNode instances.
  */
 static int
-pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_float_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
     const double left_value = ((const pm_float_node_t *) left)->value;
     const double right_value = ((const pm_float_node_t *) right)->value;
     return PM_NUMERIC_COMPARISON(left_value, right_value);
@@ -327,7 +344,7 @@ pm_string_value(const pm_node_t *node) {
  * A comparison function for comparing two nodes that have attached strings.
  */
 static int
-pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_string_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
     const pm_string_t *left_string = pm_string_value(left);
     const pm_string_t *right_string = pm_string_value(right);
     return pm_string_compare(left_string, right_string);
@@ -337,7 +354,7 @@ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata
  * A comparison function for comparing two RegularExpressionNode instances.
  */
 static int
-pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_regular_expression_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
     const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
     const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
 
@@ -353,14 +370,15 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter
  * Add a node to the set of static literals.
  */
 pm_node_t *
-pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
+pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
     switch (PM_NODE_TYPE(node)) {
         case PM_INTEGER_NODE:
         case PM_SOURCE_LINE_NODE:
             return pm_node_hash_insert(
                 &literals->integer_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -372,7 +390,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
             return pm_node_hash_insert(
                 &literals->float_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -385,7 +404,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
             return pm_node_hash_insert(
                 &literals->number_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -398,7 +418,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
             return pm_node_hash_insert(
                 &literals->string_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -410,7 +431,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
             return pm_node_hash_insert(
                 &literals->regexp_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -422,7 +444,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
             return pm_node_hash_insert(
                 &literals->symbol_nodes,
                 &(pm_static_literals_metadata_t) {
-                    .newline_list = newline_list,
+                    .line_offsets = line_offsets,
+                    .start = start,
                     .start_line = start_line,
                     .encoding_name = NULL
                 },
@@ -492,7 +515,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
 /**
  * Create a string-based representation of the given static literal.
  */
-static inline void
+static PRISM_INLINE void
 pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
     switch (PM_NODE_TYPE(node)) {
         case PM_FALSE_NODE:
@@ -502,12 +525,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
             const double value = ((const pm_float_node_t *) node)->value;
 
             if (PRISM_ISINF(value)) {
-                if (*node->location.start == '-') {
+                if (metadata->start[node->location.start] == '-') {
                     pm_buffer_append_byte(buffer, '-');
                 }
                 pm_buffer_append_string(buffer, "Infinity", 8);
             } else if (value == 0.0) {
-                if (*node->location.start == '-') {
+                if (metadata->start[node->location.start] == '-') {
                     pm_buffer_append_byte(buffer, '-');
                 }
                 pm_buffer_append_string(buffer, "0.0", 3);
@@ -576,7 +599,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
             break;
         }
         case PM_SOURCE_LINE_NODE:
-            pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
+            pm_buffer_append_format(buffer, "%d", pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line).line);
             break;
         case PM_STRING_NODE: {
             const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
@@ -604,11 +627,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
  * Create a string-based representation of the given static literal.
  */
 void
-pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
     pm_static_literal_inspect_node(
         buffer,
         &(pm_static_literals_metadata_t) {
-            .newline_list = newline_list,
+            .line_offsets = line_offsets,
+            .start = start,
             .start_line = start_line,
             .encoding_name = encoding_name
         },
diff --git a/prism/stream.h b/prism/stream.h
new file mode 100644
index 0000000000..678322b442
--- /dev/null
+++ b/prism/stream.h
@@ -0,0 +1,28 @@
+/**
+ * @file stream.h
+ *
+ * Functions for parsing streams.
+ */
+#ifndef PRISM_STREAM_H
+#define PRISM_STREAM_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/arena.h"
+#include "prism/options.h"
+#include "prism/parser.h"
+#include "prism/source.h"
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * @param parser The out parameter to write the parser to.
+ * @param arena The arena to use for all AST-lifetime allocations.
+ * @param source The source to use, created via pm_source_stream_new.
+ * @param options The optional options to use when parsing.
+ * @returns The AST representing the source.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) PRISM_NONNULL(1, 2, 3);
+
+#endif
diff --git a/prism/string_query.c b/prism/string_query.c
new file mode 100644
index 0000000000..ccedaf9c00
--- /dev/null
+++ b/prism/string_query.c
@@ -0,0 +1,166 @@
+#include "prism/string_query.h"
+
+#include "prism/internal/char.h"
+#include "prism/internal/encoding.h"
+
+#include <assert.h>
+#include <string.h>
+
+/** The category of slice returned from pm_slice_type. */
+typedef enum {
+    /** Returned when the given encoding name is invalid. */
+    PM_SLICE_TYPE_ERROR = -1,
+
+    /** Returned when no other types apply to the slice. */
+    PM_SLICE_TYPE_NONE,
+
+    /** Returned when the slice is a valid local variable name. */
+    PM_SLICE_TYPE_LOCAL,
+
+    /** Returned when the slice is a valid constant name. */
+    PM_SLICE_TYPE_CONSTANT,
+
+    /** Returned when the slice is a valid method name. */
+    PM_SLICE_TYPE_METHOD_NAME
+} pm_slice_type_t;
+
+/**
+ * Check that the slice is a valid local variable name or constant.
+ */
+static pm_slice_type_t
+pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
+    // first, get the right encoding object
+    const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
+    if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
+
+    // check that there is at least one character
+    if (length == 0) return PM_SLICE_TYPE_NONE;
+
+    size_t width;
+    if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
+        // valid because alphabetical
+    } else if (*source == '_') {
+        // valid because underscore
+        width = 1;
+    } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
+        // valid because multibyte
+    } else {
+        // invalid because no match
+        return PM_SLICE_TYPE_NONE;
+    }
+
+    // determine the type of the slice based on the first character
+    const uint8_t *end = source + length;
+    pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
+
+    // next, iterate through all of the bytes of the string to ensure that they
+    // are all valid identifier characters
+    source += width;
+
+    while (source < end) {
+        if ((width = encoding->alnum_char(source, end - source)) != 0) {
+            // valid because alphanumeric
+            source += width;
+        } else if (*source == '_') {
+            // valid because underscore
+            source++;
+        } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
+            // valid because multibyte
+            source += width;
+        } else {
+            // invalid because no match
+            break;
+        }
+    }
+
+    // accept a ! or ? at the end of the slice as a method name
+    if (*source == '!' || *source == '?' || *source == '=') {
+        source++;
+        result = PM_SLICE_TYPE_METHOD_NAME;
+    }
+
+    // valid if we are at the end of the slice
+    return source == end ? result : PM_SLICE_TYPE_NONE;
+}
+
+/**
+ * Check that the slice is a valid local variable name.
+ */
+pm_string_query_t
+pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_CONSTANT:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_LOCAL:
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid constant name.
+ */
+pm_string_query_t
+pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_LOCAL:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_CONSTANT:
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid method name.
+ */
+pm_string_query_t
+pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
+#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
+#define C1(c) (*source == c)
+#define C2(s) (memcmp(source, s, 2) == 0)
+#define C3(s) (memcmp(source, s, 3) == 0)
+
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+            break;
+        case PM_SLICE_TYPE_LOCAL:
+            // numbered parameters are not valid method names
+            return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
+        case PM_SLICE_TYPE_CONSTANT:
+            // all constants are valid method names
+        case PM_SLICE_TYPE_METHOD_NAME:
+            // all method names are valid method names
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    switch (length) {
+        case 1:
+            return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
+        case 2:
+            return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
+        case 3:
+            return B(C3("===") || C3("<=>") || C3("[]="));
+        default:
+            return PM_STRING_QUERY_FALSE;
+    }
+
+#undef B
+#undef C1
+#undef C2
+#undef C3
+}
diff --git a/prism/string_query.h b/prism/string_query.h
new file mode 100644
index 0000000000..6ee1a9d9b6
--- /dev/null
+++ b/prism/string_query.h
@@ -0,0 +1,63 @@
+/**
+ * @file string_query.h
+ *
+ * Functions for querying properties of strings, such as whether they are valid
+ * local variable names, constant names, or method names.
+ */
+#ifndef PRISM_STRING_QUERY_H
+#define PRISM_STRING_QUERY_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Represents the results of a slice query.
+ */
+typedef enum {
+    /** Returned if the encoding given to a slice query was invalid. */
+    PM_STRING_QUERY_ERROR = -1,
+
+    /** Returned if the result of the slice query is false. */
+    PM_STRING_QUERY_FALSE,
+
+    /** Returned if the result of the slice query is true. */
+    PM_STRING_QUERY_TRUE
+} pm_string_query_t;
+
+/**
+ * Check that the slice is a valid local variable name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+/**
+ * Check that the slice is a valid constant name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+/**
+ * Check that the slice is a valid method name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+#endif
diff --git a/prism/stringy.c b/prism/stringy.c
new file mode 100644
index 0000000000..d6f4c4a777
--- /dev/null
+++ b/prism/stringy.c
@@ -0,0 +1,91 @@
+#include "prism/internal/stringy.h"
+
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Initialize a shared string that is based on initial input.
+ */
+void
+pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
+    assert(start <= end);
+
+    *string = (pm_string_t) {
+        .type = PM_STRING_SHARED,
+        .source = start,
+        .length = (size_t) (end - start)
+    };
+}
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ */
+void
+pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_OWNED,
+        .source = source,
+        .length = length
+    };
+}
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ */
+void
+pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_CONSTANT,
+        .source = (const uint8_t *) source,
+        .length = length
+    };
+}
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int
+pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
+    size_t left_length = pm_string_length(left);
+    size_t right_length = pm_string_length(right);
+
+    if (left_length < right_length) {
+        return -1;
+    } else if (left_length > right_length) {
+        return 1;
+    }
+
+    return memcmp(pm_string_source(left), pm_string_source(right), left_length);
+}
+
+/**
+ * Returns the length associated with the string.
+ */
+size_t
+pm_string_length(const pm_string_t *string) {
+    return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ */
+const uint8_t *
+pm_string_source(const pm_string_t *string) {
+    return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ */
+void
+pm_string_cleanup(pm_string_t *string) {
+    if (string->type == PM_STRING_OWNED) {
+        xfree_sized((void *) string->source, string->length);
+    }
+}
diff --git a/prism/stringy.h b/prism/stringy.h
new file mode 100644
index 0000000000..0d64387ac3
--- /dev/null
+++ b/prism/stringy.h
@@ -0,0 +1,72 @@
+/**
+ * @file stringy.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
+#ifndef PRISM_STRINGY_H
+#define PRISM_STRINGY_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A generic string type that can have various ownership semantics.
+ */
+typedef struct {
+    /** A pointer to the start of the string. */
+    const uint8_t *source;
+
+    /** The length of the string in bytes of memory. */
+    size_t length;
+
+    /** The type of the string. This field determines how the string should be freed. */
+    enum {
+        /** This string is a constant string, and should not be freed. */
+        PM_STRING_CONSTANT,
+
+        /** This is a slice of another string, and should not be freed. */
+        PM_STRING_SHARED,
+
+        /** This string owns its memory, and should be freed internally. */
+        PM_STRING_OWNED
+    } type;
+} pm_string_t;
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length) PRISM_NONNULL(1);
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) PRISM_NONNULL(1, 2);
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @param string The string to get the length of.
+ * @returns The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM_NONNULL(1);
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @param string The string to get the start pointer of.
+ * @returns The start pointer of the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/strncasecmp.c
index 3f58421554..a373cad6d7 100644
--- a/prism/util/pm_strncasecmp.c
+++ b/prism/strncasecmp.c
@@ -1,11 +1,12 @@
-#include "prism/util/pm_strncasecmp.h"
+#include "prism/internal/strncasecmp.h"
+
+#include "prism/compiler/inline.h"
 
 /**
  * A locale-insensitive version of `tolower(3)`
  */
-static inline int
-pm_tolower(int c)
-{
+static PRISM_INLINE int
+pm_tolower(int c) {
     if ('A' <= c && c <= 'Z') {
         return c | 0x20;
     }
diff --git a/prism/strpbrk.c b/prism/strpbrk.c
new file mode 100644
index 0000000000..383707eb72
--- /dev/null
+++ b/prism/strpbrk.c
@@ -0,0 +1,439 @@
+#include "prism/internal/strpbrk.h"
+
+#include "prism/compiler/accel.h"
+#include "prism/compiler/inline.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/bit.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/parser.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+
+/**
+ * Add an invalid multibyte character error to the parser.
+ */
+static PRISM_INLINE void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) {
+    pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]);
+}
+
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static PRISM_INLINE void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t length) {
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == parser->encoding) {
+            // Okay, we already locked to this encoding.
+        } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            // Not okay, we already found a Unicode escape sequence and this
+            // conflicts.
+            pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+        } else {
+            // Should not be anything else.
+            assert(false && "unreachable");
+        }
+    }
+
+    parser->explicit_encoding = parser->encoding;
+}
+
+/**
+ * Scan forward through ASCII bytes looking for a byte that is in the given
+ * charset. Returns true if a match was found, storing its offset in *index.
+ * Returns false if no match was found, storing the number of ASCII bytes
+ * consumed in *index (so the caller can skip past them).
+ *
+ * All charset characters must be ASCII (< 0x80). The scanner stops at non-ASCII
+ * bytes, returning control to the caller's encoding-aware loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ *   1. NEON — processes 16 bytes per iteration on aarch64.
+ *   2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ *   3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
+
+/**
+ * Update the cached strpbrk lookup tables if the charset has changed. The
+ * parser caches the last charset's precomputed tables so that repeated calls
+ * with the same breakpoints (the common case during string/regex/list lexing)
+ * skip table construction entirely.
+ *
+ * Builds three structures:
+ *   - low_lut/high_lut: nibble-based lookup tables for SIMD matching (NEON/SSSE3)
+ *   - table: 256-bit bitmap for scalar fallback matching (all platforms)
+ */
+static PRISM_INLINE void
+pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) {
+    // The cache key is the full charset buffer (PM_STRPBRK_CACHE_SIZE bytes).
+    // Since it is always NUL-padded, a fixed-size comparison covers both
+    // content and length.
+    if (memcmp(parser->strpbrk_cache.charset, charset, sizeof(parser->strpbrk_cache.charset)) == 0) return;
+
+    memset(parser->strpbrk_cache.low_lut, 0, sizeof(parser->strpbrk_cache.low_lut));
+    memset(parser->strpbrk_cache.high_lut, 0, sizeof(parser->strpbrk_cache.high_lut));
+    memset(parser->strpbrk_cache.table, 0, sizeof(parser->strpbrk_cache.table));
+
+    // Always include NUL in the tables. The slow path uses strchr, which
+    // always matches NUL (it finds the C string terminator), so NUL is
+    // effectively always a breakpoint. Replicating that here lets the fast
+    // scanner handle NUL at full speed instead of bailing to the slow path.
+    parser->strpbrk_cache.low_lut[0x00] |= (uint8_t) (1 << 0);
+    parser->strpbrk_cache.high_lut[0x00] = (uint8_t) (1 << 0);
+    parser->strpbrk_cache.table[0] |= (uint64_t) 1;
+
+    size_t charset_len = 0;
+    for (const uint8_t *c = charset; *c != '\0'; c++) {
+        parser->strpbrk_cache.low_lut[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
+        parser->strpbrk_cache.high_lut[*c >> 4] = (uint8_t) (1 << (*c >> 4));
+        parser->strpbrk_cache.table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+        charset_len++;
+    }
+
+    // Store the new charset key, NUL-padded to the full buffer size.
+    memcpy(parser->strpbrk_cache.charset, charset, charset_len + 1);
+    memset(parser->strpbrk_cache.charset + charset_len + 1, 0, sizeof(parser->strpbrk_cache.charset) - charset_len - 1);
+}
+
+#endif
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
+
+    uint8x16_t low_lut = vld1q_u8(parser->strpbrk_cache.low_lut);
+    uint8x16_t high_lut = vld1q_u8(parser->strpbrk_cache.high_lut);
+    uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+    uint8x16_t mask_80 = vdupq_n_u8(0x80);
+
+    size_t idx = 0;
+
+    while (idx + 16 <= maximum) {
+        uint8x16_t v = vld1q_u8(source + idx);
+
+        // If any byte has the high bit set, we have non-ASCII data.
+        // Return to let the caller's encoding-aware loop handle it.
+        if (vmaxvq_u8(vandq_u8(v, mask_80)) != 0) break;
+
+        uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+        uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+        uint8x16_t matched = vtstq_u8(lo_class, hi_class);
+
+        if (vmaxvq_u8(matched) == 0) {
+            idx += 16;
+            continue;
+        }
+
+        // Find the position of the first matching byte.
+        uint64_t lo64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 0);
+        if (lo64 != 0) {
+            *index = idx + pm_ctzll(lo64) / 8;
+            return true;
+        }
+        uint64_t hi64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 1);
+        *index = idx + 8 + pm_ctzll(hi64) / 8;
+        return true;
+    }
+
+    // Scalar tail for remaining < 16 ASCII bytes.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
+
+    __m128i low_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.low_lut);
+    __m128i high_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.high_lut);
+    __m128i mask_0f = _mm_set1_epi8(0x0F);
+
+    size_t idx = 0;
+
+    while (idx + 16 <= maximum) {
+        __m128i v = _mm_loadu_si128((const __m128i *) (source + idx));
+
+        // If any byte has the high bit set, stop.
+        if (_mm_movemask_epi8(v) != 0) break;
+
+        // Nibble-based classification using pshufb (SSSE3), same as NEON
+        // vqtbl1q_u8. A byte matches iff (low_lut[lo_nib] & high_lut[hi_nib]) != 0.
+        __m128i lo_class = _mm_shuffle_epi8(low_lut, _mm_and_si128(v, mask_0f));
+        __m128i hi_class = _mm_shuffle_epi8(high_lut, _mm_and_si128(_mm_srli_epi16(v, 4), mask_0f));
+        __m128i matched = _mm_and_si128(lo_class, hi_class);
+
+        // Check if any byte matched.
+        int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(matched, _mm_setzero_si128()));
+
+        if (mask == 0xFFFF) {
+            // All bytes were zero — no match in this chunk.
+            idx += 16;
+            continue;
+        }
+
+        // Find the first matching byte (first non-zero in matched).
+        *index = idx + pm_ctzll((uint64_t) (~mask & 0xFFFF));
+        return true;
+    }
+
+    // Scalar tail.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#elif defined(PRISM_HAS_SWAR)
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
+
+    static const uint64_t highs = 0x8080808080808080ULL;
+    size_t idx = 0;
+
+    while (idx + 8 <= maximum) {
+        uint64_t word;
+        memcpy(&word, source + idx, 8);
+
+        // Bail on any non-ASCII byte.
+        if (word & highs) break;
+
+        // Check each byte against the charset table.
+        for (size_t j = 0; j < 8; j++) {
+            uint8_t byte = source[idx + j];
+            if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+                *index = idx + j;
+                return true;
+            }
+        }
+
+        idx += 8;
+    }
+
+    // Scalar tail.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#else
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(PRISM_UNUSED pm_parser_t *parser, PRISM_UNUSED const uint8_t *source, PRISM_UNUSED size_t maximum, PRISM_UNUSED const uint8_t *charset, size_t *index) {
+    *index = 0;
+    return false;
+}
+
+#endif
+
+/**
+ * This is the default path.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the path when the encoding is ASCII-8BIT.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), 1);
+        index++;
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (validate) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width);
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80 || !validate) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width);
+
+            if (width > 0) {
+                index += width;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
+const uint8_t *
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
+    if (length <= 0) return NULL;
+
+    size_t maximum = (size_t) length;
+    size_t index = 0;
+    if (scan_strpbrk_ascii(parser, source, maximum, charset, &index)) return source + index;
+
+    if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, index, maximum, validate);
+    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        return pm_strpbrk_ascii_8bit(parser, source, charset, index, maximum, validate);
+    } else if (parser->encoding->multibyte) {
+        return pm_strpbrk_multi_byte(parser, source, charset, index, maximum, validate);
+    } else {
+        return pm_strpbrk_single_byte(parser, source, charset, index, maximum, validate);
+    }
+}
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 23af8886a7..41d7165930 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -1,5 +1,9 @@
 #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
 #include "prism/extension.h"
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
 
 extern VALUE rb_cPrism;
 extern VALUE rb_cPrismNode;
@@ -12,25 +16,20 @@ static VALUE rb_cPrism<%= node.name %>;
 <%- end -%>
 
 static VALUE
-pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) {
+pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) {
     if (freeze) {
-        VALUE location_argv[] = {
-            source,
-            LONG2FIX(start - parser->start),
-            LONG2FIX(end - start)
-        };
-
+        VALUE location_argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
         return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
     } else {
-        uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
+        uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length));
         return ULL2NUM(value);
     }
 }
 
 VALUE
 pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
-    ID type = rb_intern(pm_token_type_name(token->type));
-    VALUE location = pm_location_new(parser, token->start, token->end, source, freeze);
+    ID type = rb_intern(pm_token_type(token->type));
+    VALUE location = pm_location_new((uint32_t) (token->start - pm_parser_start(parser)), (uint32_t) (token->end - token->start), source, freeze);
 
     VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
     if (freeze) rb_obj_freeze(slice);
@@ -79,19 +78,25 @@ pm_integer_new(const pm_integer_t *integer) {
 // Create a Prism::Source object from the given parser, after pm_parse() was called.
 VALUE
 pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
-    VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
+    const uint8_t *start = pm_parser_start(parser);
+    VALUE source_string = rb_enc_str_new((const char *) start, pm_parser_end(parser) - start, encoding);
 
-    VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
-    for (size_t index = 0; index < parser->newline_list.size; index++) {
-        rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
-    }
+    const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
+    VALUE offsets;
 
     if (freeze) {
+        offsets = rb_ary_new_capa(line_offsets->size);
+        for (size_t index = 0; index < line_offsets->size; index++) {
+            rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index]));
+        }
+
         rb_obj_freeze(source_string);
         rb_obj_freeze(offsets);
+    } else {
+        offsets = rb_str_new((const char *) line_offsets->offsets, line_offsets->size * sizeof(uint32_t));
     }
 
-    VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
+    VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets);
     if (freeze) rb_obj_freeze(source);
 
     return source;
@@ -104,8 +109,8 @@ typedef struct pm_node_stack_node {
 } pm_node_stack_node_t;
 
 static void
-pm_node_stack_push(pm_node_stack_node_t **stack, const pm_node_t *visit) {
-    pm_node_stack_node_t *node = xmalloc(sizeof(pm_node_stack_node_t));
+pm_node_stack_push(pm_arena_t *arena, pm_node_stack_node_t **stack, const pm_node_t *visit) {
+    pm_node_stack_node_t *node = (pm_node_stack_node_t *) pm_arena_alloc(arena, sizeof(pm_node_stack_node_t), PRISM_ALIGNOF(pm_node_stack_node_t));
     node->prev = *stack;
     node->visit = visit;
     node->visited = false;
@@ -118,32 +123,40 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) {
     const pm_node_t *visit = current->visit;
 
     *stack = current->prev;
-    xfree(current);
 
     return visit;
 }
 
-VALUE
-pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
-    VALUE constants = rb_ary_new_capa(parser->constant_pool.size);
-
-    for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
-        pm_constant_t *constant = &parser->constant_pool.constants[index];
-        int state = 0;
+typedef struct {
+    VALUE constants;
+    rb_encoding *encoding;
+} pm_ast_constants_each_data_t;
 
-        VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding);
-        VALUE value = rb_protect(rb_str_intern, string, &state);
+static void
+pm_ast_constants_each(const pm_constant_t *constant, void *data) {
+    pm_ast_constants_each_data_t *constants_data = (pm_ast_constants_each_data_t *) data;
+    int state = 0;
 
-        if (state != 0) {
-            value = ID2SYM(rb_intern_const("?"));
-            rb_set_errinfo(Qnil);
-        }
+    VALUE string = rb_enc_str_new((const char *) pm_constant_start(constant), pm_constant_length(constant), constants_data->encoding);
+    VALUE value = rb_protect(rb_str_intern, string, &state);
 
-        rb_ary_push(constants, value);
+    if (state != 0) {
+        value = ID2SYM(rb_intern_const("?"));
+        rb_set_errinfo(Qnil);
     }
 
+    rb_ary_push(constants_data->constants, value);
+}
+
+VALUE
+pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
+    VALUE constants = rb_ary_new_capa(pm_parser_constants_size(parser));
+    pm_ast_constants_each_data_t constants_data = { .constants = constants, .encoding = encoding };
+    pm_parser_constants_each(parser, pm_ast_constants_each, &constants_data);
+
+    pm_arena_t *node_arena = pm_arena_new();
     pm_node_stack_node_t *node_stack = NULL;
-    pm_node_stack_push(&node_stack, node);
+    pm_node_stack_push(node_arena, &node_stack, node);
     VALUE value_stack = rb_ary_new();
 
     while (node_stack != NULL) {
@@ -166,10 +179,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
                     <%- node.fields.each do |field| -%>
                     <%- case field -%>
                     <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
-                    pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
+                    pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>);
                     <%- when Prism::Template::NodeListField -%>
                     for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
-                        pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
+                        pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
                     }
                     <%- end -%>
                     <%- end -%>
@@ -200,7 +213,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
                     argv[1] = ULONG2NUM(node->node_id);
 
                     // location
-                    argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze);
+                    argv[2] = pm_location_new(node->location.start, node->location.length, source, freeze);
 
                     // flags
                     argv[3] = ULONG2NUM(node->flags);
@@ -237,10 +250,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
                     if (freeze) rb_obj_freeze(argv[<%= index %>]);
                     <%- when Prism::Template::LocationField -%>
 #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
-                    argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+                    argv[<%= index %>] = pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
                     <%- when Prism::Template::OptionalLocationField -%>
 #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
-                    argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+                    argv[<%= index %>] = cast-><%= field.name %>.length == 0 ? Qnil : pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
                     <%- when Prism::Template::UInt8Field -%>
 #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
                     argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
@@ -271,6 +284,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
         }
     }
 
+    pm_arena_free(node_arena);
     return rb_ary_pop(value_stack);
 }
 
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 751c0b43c2..3b3be25e76 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -2,16 +2,20 @@
  * @file ast.h
  *
  * The abstract syntax tree.
+ *
+ * --
  */
 #ifndef PRISM_AST_H
 #define PRISM_AST_H
 
-#include "prism/defines.h"
-#include "prism/util/pm_constant_pool.h"
-#include "prism/util/pm_integer.h"
-#include "prism/util/pm_string.h"
+#include "prism/compiler/align.h"
+#include "prism/compiler/exported.h"
+
+#include "prism/arena.h"
+#include "prism/constant_pool.h"
+#include "prism/integer.h"
+#include "prism/stringy.h"
 
-#include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -20,7 +24,7 @@
  */
 typedef enum pm_token_type {
 <%- tokens.each do |token| -%>
-    /** <%= token.comment %> */
+    /** <%= Prism::Template::Doxygen.verbatim(token.comment) %> */
     PM_TOKEN_<%= token.name %><%= " = #{token.value}" if token.value %>,
 
 <%- end -%>
@@ -44,15 +48,28 @@ typedef struct {
 } pm_token_t;
 
 /**
- * This represents a range of bytes in the source string to which a node or
- * token corresponds.
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The type of the token to get the string representation of.
+ * @returns A string representation of the given token type. This is meant for
+ *     debugging purposes and is not guaranteed to be stable across versions.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_token_type(pm_token_type_t token_type);
+
+/**
+ * This struct represents a slice in the source code, defined by an offset and
+ * a length. Note that we have confirmation that we can represent all locations
+ * within Ruby source files using 32-bit integers per:
+ *
+ *     https://bugs.ruby-lang.org/issues/20488#note-1
+ *
  */
 typedef struct {
-    /** A pointer to the start location of the range in the source. */
-    const uint8_t *start;
+    /** The offset of the location from the start of the source. */
+    uint32_t start;
 
-    /** A pointer to the end location of the range in the source. */
-    const uint8_t *end;
+    /** The length of the location. */
+    uint32_t length;
 } pm_location_t;
 
 struct pm_node;
@@ -104,29 +121,13 @@ static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
 static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
 
 /**
- * Cast the type to an enum to allow the compiler to provide exhaustiveness
- * checking.
- */
-#define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
-
-/**
- * Return true if the type of the given node matches the given type.
- */
-#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
-
-/**
- * Return true if the given flag is set on the given node.
- */
-#define PM_NODE_FLAG_P(node, flag) ((((pm_node_t *)(node))->flags & (flag)) != 0)
-
-/**
  * This is the base structure that represents a node in the syntax tree. It is
  * embedded into every node type.
  */
 typedef struct pm_node {
     /**
      * This represents the type of the node. It somewhat maps to the nodes that
-     * existed in the original grammar and ripper, but it's not a 1:1 mapping.
+     * existed in the original grammar and ripper, but it is not a 1:1 mapping.
      */
     pm_node_type_t type;
 
@@ -143,11 +144,46 @@ typedef struct pm_node {
     uint32_t node_id;
 
     /**
-     * This is the location of the node in the source. It's a range of bytes
+     * This is the location of the node in the source. It is a range of bytes
      * containing a start and an end.
      */
     pm_location_t location;
 } pm_node_t;
+
+/**
+ * Cast the given node to the base pm_node_t type.
+ */
+#define PM_NODE_UPCAST(node_) ((pm_node_t *) (node_))
+
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node_) ((enum pm_node_type) (node_)->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
+#define PM_NODE_TYPE_P(node_, type_) (PM_NODE_TYPE(node_) == (type_))
+
+/**
+ * Return the flags associated with the given node.
+ */
+#define PM_NODE_FLAGS(node_) (PM_NODE_UPCAST(node_)->flags)
+
+/**
+ * Return true if the given flag is set on the given node.
+ */
+#define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0)
+
+/**
+ * The alignment required for a child node within a parent node.
+ */
+#ifdef _MSC_VER
+#define PM_NODE_ALIGNAS __declspec(align(8))
+#else
+#define PM_NODE_ALIGNAS PRISM_ALIGNAS(PRISM_ALIGNOF(void *))
+#endif
 <%- nodes.each do |node| -%>
 
 /**
@@ -170,7 +206,6 @@ typedef struct pm_node {
 typedef struct pm_<%= node.human %> {
     /** The embedded base node. */
     pm_node_t base;
-
 <%- node.fields.each do |field| -%>
 
     /**
@@ -183,7 +218,7 @@ typedef struct pm_<%= node.human %> {
     <%- end -%>
      */
     <%= case field
-    when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
+    when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "PM_NODE_ALIGNAS struct #{field.c_type} *#{field.name}"
     when Prism::Template::NodeListField then "struct pm_node_list #{field.name}"
     when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}"
     when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
@@ -210,8 +245,27 @@ typedef enum pm_<%= flag.human %> {
     /** <%= value.comment %> */
     PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << (index + Prism::Template::COMMON_FLAGS_COUNT) %>,
     <%- end -%>
+
+    PM_<%= flag.human.upcase %>_LAST,
 } pm_<%= flag.human %>_t;
 <%- end -%>
+<%- nodes.each do |node| -%>
+
+<%- params = node.fields.map(&:c_param) -%>
+/**
+ * Allocate and initialize a new <%= node.name %> node.
+ *
+ * @param arena The arena to allocate from.
+ * @param node_id The unique identifier for this node.
+ * @param flags The flags for this node.
+ * @param location The location of this node in the source.
+<%- node.fields.each do |field| -%>
+ * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %>
+<%- end -%>
+ * @returns The newly allocated and initialized node.
+ */
+PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>);
+<%- end -%>
 
 /**
  * When we're serializing to Java, we want to skip serializing the location
diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb
deleted file mode 100644
index 07bbc8fae7..0000000000
--- a/prism/templates/include/prism/diagnostic.h.erb
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * @file diagnostic.h
- *
- * A list of diagnostics generated during parsing.
- */
-#ifndef PRISM_DIAGNOSTIC_H
-#define PRISM_DIAGNOSTIC_H
-
-#include "prism/ast.h"
-#include "prism/defines.h"
-#include "prism/util/pm_list.h"
-
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-
-/**
- * The diagnostic IDs of all of the diagnostics, used to communicate the types
- * of errors between the parser and the user.
- */
-typedef enum {
-    // These are the error diagnostics.
-    <%- errors.each do |error| -%>
-    PM_ERR_<%= error.name %>,
-    <%- end -%>
-
-    // These are the warning diagnostics.
-    <%- warnings.each do |warning| -%>
-    PM_WARN_<%= warning.name %>,
-    <%- end -%>
-} pm_diagnostic_id_t;
-
-/**
- * This struct represents a diagnostic generated during parsing.
- *
- * @extends pm_list_node_t
- */
-typedef struct {
-    /** The embedded base node. */
-    pm_list_node_t node;
-
-    /** The location of the diagnostic in the source. */
-    pm_location_t location;
-
-    /** The ID of the diagnostic. */
-    pm_diagnostic_id_t diag_id;
-
-    /** The message associated with the diagnostic. */
-    const char *message;
-
-    /**
-     * Whether or not the memory related to the message of this diagnostic is
-     * owned by this diagnostic. If it is, it needs to be freed when the
-     * diagnostic is freed.
-     */
-    bool owned;
-
-    /**
-     * The level of the diagnostic, see `pm_error_level_t` and
-     * `pm_warning_level_t` for possible values.
-     */
-    uint8_t level;
-} pm_diagnostic_t;
-
-/**
- * The levels of errors generated during parsing.
- */
-typedef enum {
-    /** For errors that should raise a syntax error. */
-    PM_ERROR_LEVEL_SYNTAX = 0,
-
-    /** For errors that should raise an argument error. */
-    PM_ERROR_LEVEL_ARGUMENT = 1,
-
-    /** For errors that should raise a load error. */
-    PM_ERROR_LEVEL_LOAD = 2
-} pm_error_level_t;
-
-/**
- * The levels of warnings generated during parsing.
- */
-typedef enum {
-    /** For warnings which should be emitted if $VERBOSE != nil. */
-    PM_WARNING_LEVEL_DEFAULT = 0,
-
-    /** For warnings which should be emitted if $VERBOSE == true. */
-    PM_WARNING_LEVEL_VERBOSE = 1
-} pm_warning_level_t;
-
-/**
- * Get the human-readable name of the given diagnostic ID.
- *
- * @param diag_id The diagnostic ID.
- * @return The human-readable name of the diagnostic ID.
- */
-const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using shared
- * memory for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using a format
- * string for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @param ... The arguments to the format string for the message.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- *
- * @param list The list to deallocate.
- */
-void pm_diagnostic_list_free(pm_list_t *list);
-
-#endif
diff --git a/prism/templates/include/prism/internal/diagnostic.h.erb b/prism/templates/include/prism/internal/diagnostic.h.erb
new file mode 100644
index 0000000000..ee44ff5382
--- /dev/null
+++ b/prism/templates/include/prism/internal/diagnostic.h.erb
@@ -0,0 +1,60 @@
+#ifndef PRISM_INTERNAL_DIAGNOSTIC_H
+#define PRISM_INTERNAL_DIAGNOSTIC_H
+
+#include "prism/internal/list.h"
+
+#include "prism/arena.h"
+#include "prism/diagnostic.h"
+
+/*
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+    /* These are the error diagnostics. */
+    <%- errors.each do |error| -%>
+    PM_ERR_<%= error.name %>,
+    <%- end -%>
+
+    /* These are the warning diagnostics. */
+    <%- warnings.each do |warning| -%>
+    PM_WARN_<%= warning.name %>,
+    <%- end -%>
+} pm_diagnostic_id_t;
+
+/*
+ * This struct represents a diagnostic generated during parsing.
+ */
+struct pm_diagnostic_t {
+    /* The embedded base node. */
+    pm_list_node_t node;
+
+    /* The location of the diagnostic in the source. */
+    pm_location_t location;
+
+    /* The ID of the diagnostic. */
+    pm_diagnostic_id_t diag_id;
+
+    /* The message associated with the diagnostic. */
+    const char *message;
+
+    /*
+     * The level of the diagnostic, see `pm_error_level_t` and
+     * `pm_warning_level_t` for possible values.
+     */
+    uint8_t level;
+};
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ */
+void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id);
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...);
+
+#endif
diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb
index 45ed88d8de..13317cac04 100644
--- a/prism/templates/lib/prism/compiler.rb.erb
+++ b/prism/templates/lib/prism/compiler.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # A compiler is a visitor that returns the value of each node as it visits.
   # This is as opposed to a visitor which will only walk the tree. This can be
@@ -18,24 +21,32 @@ module Prism
   #
   class Compiler < Visitor
     # Visit an individual node.
-    def visit(node)
+    #--
+    #: (node?) -> untyped
+    def visit(node) # :nodoc:
       node&.accept(self)
     end
 
     # Visit a list of nodes.
-    def visit_all(nodes)
+    #--
+    #: (Array[node?]) -> untyped
+    def visit_all(nodes) # :nodoc:
       nodes.map { |node| node&.accept(self) }
     end
 
     # Visit the child nodes of the given node.
-    def visit_child_nodes(node)
-      node.compact_child_nodes.map { |node| node.accept(self) }
+    #--
+    #: (node) -> Array[untyped]
+    def visit_child_nodes(node) # :nodoc:
+      node.each_child_node.map { |node| node.accept(self) }
     end
 
     <%- nodes.each_with_index do |node, index| -%>
 <%= "\n" if index != 0 -%>
-    # Compile a <%= node.name %> node
-    alias visit_<%= node.human %> visit_child_nodes
+    #: (<%= node.name %>) -> Array[untyped]
+    def visit_<%= node.human %>(node) # :nodoc:
+      node.each_child_node.map { |node| node.accept(self) }
+    end
     <%- end -%>
   end
 end
diff --git a/prism/templates/lib/prism/dispatcher.rb.erb b/prism/templates/lib/prism/dispatcher.rb.erb
index 52478451c9..5991b0c904 100644
--- a/prism/templates/lib/prism/dispatcher.rb.erb
+++ b/prism/templates/lib/prism/dispatcher.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # The dispatcher class fires events for nodes that are found while walking an
   # AST to all registered listeners. It's useful for performing different types
@@ -32,50 +35,52 @@ module Prism
   #     dispatcher.dispatch_once(integer)
   #
   class Dispatcher < Visitor
-    # attr_reader listeners: Hash[Symbol, Array[Listener]]
-    attr_reader :listeners
+    # A hash mapping event names to arrays of listeners that should be notified
+    # when that event is fired.
+    attr_reader :listeners #: Hash[Symbol, Array[untyped]]
 
     # Initialize a new dispatcher.
+    #--
+    #: () -> void
     def initialize
       @listeners = {}
     end
 
     # Register a listener for one or more events.
-    #
-    # def register: (Listener, *Symbol) -> void
+    #--
+    #: (untyped, *Symbol) -> void
     def register(listener, *events)
       register_events(listener, events)
     end
 
     # Register all public methods of a listener that match the pattern
     # `on_<node_name>_(enter|leave)`.
-    #
-    # def register_public_methods: (Listener) -> void
+    #--
+    #: (untyped) -> void
     def register_public_methods(listener)
       register_events(listener, listener.public_methods(false).grep(/\Aon_.+_(?:enter|leave)\z/))
     end
 
     # Register a listener for the given events.
-    private def register_events(listener, events)
+    #--
+    #: (untyped, Array[Symbol]) -> void
+    private def register_events(listener, events) # :nodoc:
       events.each { |event| (listeners[event] ||= []) << listener }
     end
 
     # Walks `root` dispatching events to all registered listeners.
-    #
-    # def dispatch: (Node) -> void
     alias dispatch visit
 
     # Dispatches a single event for `node` to all registered listeners.
-    #
-    # def dispatch_once: (Node) -> void
+    #--
+    #: (node node) -> void
     def dispatch_once(node)
       node.accept(DispatchOnce.new(listeners))
     end
     <%- nodes.each do |node| -%>
 
-    # Dispatch enter and leave events for <%= node.name %> nodes and continue
-    # walking the tree.
-    def visit_<%= node.human %>(node)
+    #: (<%= node.name %> node) -> void
+    def visit_<%= node.human %>(node) # :nodoc:
       listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
       super
       listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
@@ -83,14 +88,17 @@ module Prism
     <%- end -%>
 
     class DispatchOnce < Visitor # :nodoc:
-      attr_reader :listeners
+      attr_reader :listeners #: Hash[Symbol, Array[untyped]]
 
+      #: (Hash[Symbol, Array[untyped]] listeners) -> void
       def initialize(listeners)
         @listeners = listeners
       end
       <%- nodes.each do |node| -%>
 
       # Dispatch enter and leave events for <%= node.name %> nodes.
+      #--
+      #: (<%= node.name %> node) -> void
       def visit_<%= node.human %>(node)
         listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
         listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb
index e9c81e4545..88ef1e1f36 100644
--- a/prism/templates/lib/prism/dot_visitor.rb.erb
+++ b/prism/templates/lib/prism/dot_visitor.rb.erb
@@ -1,18 +1,26 @@
-require "cgi"
+#--
+# rbs_inline: enabled
+
+require "cgi/escape"
+require "cgi/util" unless defined?(CGI::EscapeExt)
 
 module Prism
   # This visitor provides the ability to call Node#to_dot, which converts a
   # subtree into a graphviz dot graph.
   class DotVisitor < Visitor
     class Field # :nodoc:
-      attr_reader :name, :value, :port
+      attr_reader :name #: String
+      attr_reader :value #: String?
+      attr_reader :port #: bool
 
+      #: (String name, String? value, bool port) -> void
       def initialize(name, value, port)
         @name = name
         @value = value
         @port = port
       end
 
+      #: () -> String
       def to_dot
         if port
           "<tr><td align=\"left\" colspan=\"2\" port=\"#{name}\">#{name}</td></tr>"
@@ -23,17 +31,21 @@ module Prism
     end
 
     class Table # :nodoc:
-      attr_reader :name, :fields
+      attr_reader :name #: String
+      attr_reader :fields #: Array[Field]
 
+      #: (String name) -> void
       def initialize(name)
         @name = name
         @fields = []
       end
 
+      #: (String name, ?String? value, ?port: bool) -> void
       def field(name, value = nil, port: false)
         fields << Field.new(name, value, port)
       end
 
+      #: () -> String
       def to_dot
         dot = <<~DOT
           <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
@@ -49,26 +61,31 @@ module Prism
     end
 
     class Digraph # :nodoc:
-      attr_reader :nodes, :waypoints, :edges
+      attr_reader :nodes, :waypoints, :edges #: Array[String]
 
+      #: () -> void
       def initialize
         @nodes = []
         @waypoints = []
         @edges = []
       end
 
+      #: (String value) -> void
       def node(value)
         nodes << value
       end
 
+      #: (String value) -> void
       def waypoint(value)
         waypoints << value
       end
 
+      #: (String value) -> void
       def edge(value)
         edges << value
       end
 
+      #: () -> String
       def to_dot
         <<~DOT
           digraph "Prism" {
@@ -92,21 +109,25 @@ module Prism
     private_constant :Field, :Table, :Digraph
 
     # The digraph that is being built.
-    attr_reader :digraph
+    attr_reader :digraph #: Digraph
 
     # Initialize a new dot visitor.
+    #--
+    #: () -> void
     def initialize
       @digraph = Digraph.new
     end
 
     # Convert this visitor into a graphviz dot graph string.
+    #--
+    #: () -> String
     def to_dot
       digraph.to_dot
     end
     <%- nodes.each do |node| -%>
 
-    # Visit a <%= node.name %> node.
-    def visit_<%= node.human %>(node)
+    #: (<%= node.name %>) -> void
+    def visit_<%= node.human %>(node) # :nodoc:
       table = Table.new("<%= node.name %>")
       id = node_id(node)
       <%- if (node_flags = node.flags) -%>
@@ -151,7 +172,7 @@ module Prism
       <%- end -%>
       <%- end -%>
 
-      digraph.nodes << <<~DOT
+      digraph.node(<<~DOT)
         #{id} [
           label=<#{table.to_dot.gsub(/\n/, "\n  ")}>
         ];
@@ -164,19 +185,25 @@ module Prism
     private
 
     # Generate a unique node ID for a node throughout the digraph.
-    def node_id(node)
+    #--
+    #: (node) -> String
+    def node_id(node) # :nodoc:
       "Node_#{node.object_id}"
     end
 
-    # Inspect a location to display the start and end line and column numbers.
-    def location_inspect(location)
+    # Inspect a location to display the start and end line and columns in bytes.
+    #--
+    #: (Location) -> String
+    def location_inspect(location) # :nodoc:
       "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})"
     end
     <%- flags.each do |flag| -%>
 
     # Inspect a node that has <%= flag.human %> flags to display the flags as a
     # comma-separated list.
-    def <%= flag.human %>_inspect(node)
+    #--
+    #: (<%= nodes.filter_map { |node| node.name if node.flags == flag }.join(" | ") %> node) -> String
+    def <%= flag.human %>_inspect(node) # :nodoc:
       flags = [] #: Array[String]
       <%- flag.values.each do |value| -%>
       flags << "<%= value.name.downcase %>" if node.<%= value.name.downcase %>?
diff --git a/prism/templates/lib/prism/dsl.rb.erb b/prism/templates/lib/prism/dsl.rb.erb
index e16ebb7110..be7dc6d9c1 100644
--- a/prism/templates/lib/prism/dsl.rb.erb
+++ b/prism/templates/lib/prism/dsl.rb.erb
@@ -1,8 +1,11 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # The DSL module provides a set of methods that can be used to create prism
   # nodes in a more concise manner. For example, instead of writing:
   #
-  #     source = Prism::Source.for("[1]")
+  #     source = Prism::Source.for("[1]", 1, [0])
   #
   #     Prism::ArrayNode.new(
   #       source,
@@ -56,17 +59,31 @@ module Prism
     extend self
 
     # Create a new Source object.
+    #--
+    #: (String string) -> Source
     def source(string)
-      Source.for(string)
+      Source.for(string, 1, build_offsets(string))
     end
 
     # Create a new Location object.
+    #--
+    #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
     def location(source: default_source, start_offset: 0, length: 0)
       Location.new(source, start_offset, length)
     end
     <%- nodes.each do |node| -%>
+    <%-
+    params = [
+      ["source", "Source"],
+      ["node_id", "Integer"],
+      ["location", "Location"],
+      ["flags", "Integer"]
+    ].concat(node.fields.map { |field| [field.name, field.rbs_class] })
+    -%>
 
     # Create a new <%= node.name %> node.
+    #--
+    #: (<%= params.map { |(name, type)| "?#{name}: #{type}" }.join(", ") %>) -> <%= node.name %>
     def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
       case field
       when Prism::Template::NodeField
@@ -100,6 +117,8 @@ module Prism
     <%- flags.each do |flag| -%>
 
     # Retrieve the value of one of the <%= flag.name %> flags.
+    #--
+    #: (Symbol name) -> Integer
     def <%= flag.human.chomp("s") %>(name)
       case name
       <%- flag.values.each do |value| -%>
@@ -114,20 +133,40 @@ module Prism
 
     # The default source object that gets attached to nodes and locations if no
     # source is specified.
+    #--
+    #: () -> Source
     def default_source
-      Source.for("")
+      Source.for("", 1, [0])
     end
 
     # The default location object that gets attached to nodes if no location is
     # specified, which uses the given source.
+    #--
+    #: () -> Location
     def default_location
       Location.new(default_source, 0, 0)
     end
 
     # The default node that gets attached to nodes if no node is specified for a
     # required node field.
+    #--
+    #: (Source source, Location location) -> node
     def default_node(source, location)
-      MissingNode.new(source, -1, location, 0)
+      ErrorRecoveryNode.new(source, -1, location, 0, nil)
+    end
+
+    private
+
+    # Build the newline byte offset array for the given source string.
+    #--
+    #: (String source) -> Array[Integer]
+    def build_offsets(source)
+      offsets = [0]
+      start = 0
+      while (index = source.byteindex("\n", start))
+        offsets << (start = index + 1)
+      end
+      offsets
     end
   end
 end
diff --git a/prism/templates/lib/prism/inspect_visitor.rb.erb b/prism/templates/lib/prism/inspect_visitor.rb.erb
index 3cfe615d85..820f5ae75f 100644
--- a/prism/templates/lib/prism/inspect_visitor.rb.erb
+++ b/prism/templates/lib/prism/inspect_visitor.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # This visitor is responsible for composing the strings that get returned by
   # the various #inspect methods defined on each of the nodes.
@@ -7,8 +10,9 @@ module Prism
     # when we hit an element in that list. In this case, we have a special
     # command that replaces the subsequent indent with the given value.
     class Replace # :nodoc:
-      attr_reader :value
+      attr_reader :value #: String
 
+      #: (String value) -> void
       def initialize(value)
         @value = value
       end
@@ -17,19 +21,25 @@ module Prism
     private_constant :Replace
 
     # The current prefix string.
-    attr_reader :indent
+    # :stopdoc:
+    attr_reader :indent #: String
+    # :startdoc:
 
     # The list of commands that we need to execute in order to compose the
     # final string.
-    attr_reader :commands
+    #: stopdoc:
+    attr_reader :commands #: Array[[String | node | Replace, String]]
+    # :startdoc:
 
-    # Initializes a new instance of the InspectVisitor.
-    def initialize(indent = +"")
+    #: (?String indent) -> void
+    def initialize(indent = +"") # :nodoc:
       @indent = indent
       @commands = []
     end
 
     # Compose an inspect string for the given node.
+    #--
+    #: (node node) -> String
     def self.compose(node)
       visitor = new
       node.accept(visitor)
@@ -37,7 +47,9 @@ module Prism
     end
 
     # Compose the final string.
-    def compose
+    #--
+    #: () -> String
+    def compose # :nodoc:
       buffer = +""
       replace = nil
 
@@ -66,8 +78,8 @@ module Prism
     end
     <%- nodes.each do |node| -%>
 
-    # Inspect a <%= node.name %> node.
-    def visit_<%= node.human %>(node)
+    #: (<%= node.name %> node) -> void
+    def visit_<%= node.human %>(node) # :nodoc:
       commands << [inspect_node(<%= node.name.inspect %>, node), indent]
       <%- (fields = [node.flags || Prism::Template::Flags.empty, *node.fields]).each_with_index do |field, index| -%>
       <%- pointer = index == fields.length - 1 ? "└── " : "├── " -%>
@@ -114,13 +126,17 @@ module Prism
     private
 
     # Compose a header for the given node.
-    def inspect_node(name, node)
+    #--
+    #: (String name, node node) -> String
+    def inspect_node(name, node) # :nodoc:
       location = node.location
       "@ #{name} (location: (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}))\n"
     end
 
     # Compose a string representing the given inner location field.
-    def inspect_location(location)
+    #--
+    #: (Location? location) -> String
+    def inspect_location(location) # :nodoc:
       if location
         "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}) = #{location.slice.inspect}"
       else
diff --git a/prism/templates/lib/prism/mutation_compiler.rb.erb b/prism/templates/lib/prism/mutation_compiler.rb.erb
index 565ee4e315..2d555048d2 100644
--- a/prism/templates/lib/prism/mutation_compiler.rb.erb
+++ b/prism/templates/lib/prism/mutation_compiler.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # This visitor walks through the tree and copies each node as it is being
   # visited. This is useful for consumers that want to mutate the tree, as you
@@ -5,8 +8,8 @@ module Prism
   class MutationCompiler < Compiler
     <%- nodes.each_with_index do |node, index| -%>
 <%= "\n" if index != 0 -%>
-    # Copy a <%= node.name %> node
-    def visit_<%= node.human %>(node)
+    #: (<%= node.name %>) -> node?
+    def visit_<%= node.human %>(node) # :nodoc:
       <%- fields = node.fields.select { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
       <%- if fields.any? -%>
       node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::Template::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb
index ceee2b0ffe..fb13051aba 100644
--- a/prism/templates/lib/prism/node.rb.erb
+++ b/prism/templates/lib/prism/node.rb.erb
@@ -1,24 +1,49 @@
+#--
+# rbs_inline: enabled
+
 module Prism
+  # @rbs!
+  #    interface _Repository
+  #      def enter: (Integer node_id, Symbol field_name) -> Relocation::Entry
+  #    end
+  #
+  #    interface _Node
+  #      def deconstruct: () -> Array[Prism::node?]
+  #      def inspect: () -> String
+  #    end
+  #
+  #    type node = Node & _Node
+
   # This represents a node in the tree. It is the parent class of all of the
   # various node types.
   class Node
     # A pointer to the source that this node was created from.
-    attr_reader :source
+    # :stopdoc:
+    attr_reader :source #: Source
     private :source
+    # :startdoc:
 
     # A unique identifier for this node. This is used in a very specific
     # use case where you want to keep around a reference to a node without
     # having to keep around the syntax tree in memory. This unique identifier
     # will be consistent across multiple parses of the same source code.
-    attr_reader :node_id
+    attr_reader :node_id #: Integer
+
+    # The location associated with this node. For lazily loading Location
+    # objects, we keep it as a packed integer until it is accessed.
+    # @rbs @location: Location | Integer
 
     # Save this node using a saved source so that it can be retrieved later.
+    #--
+    #: (_Repository repository) -> Relocation::Entry
     def save(repository)
       repository.enter(node_id, :itself)
     end
 
     # A Location instance that represents the location of this node in the
     # source.
+    #--
+    #: () -> Location
     def location
       location = @location
       return location if location.is_a?(Location)
@@ -26,104 +51,151 @@ module Prism
     end
 
     # Save the location using a saved source so that it can be retrieved later.
+    #--
+    #: (_Repository repository) -> Relocation::Entry
     def save_location(repository)
       repository.enter(node_id, :location)
     end
 
-    # Delegates to the start_line of the associated location object.
+    # --------------------------------------------------------------------------
+    # :section: Location Delegators
+    # These methods provide convenient access to the underlying Location object.
+    # --------------------------------------------------------------------------
+
+    # Delegates to [`start_line`](rdoc-ref:Location#start_line) of the associated location object.
+    #--
+    #: () -> Integer
     def start_line
       location.start_line
     end
 
-    # Delegates to the end_line of the associated location object.
+    # Delegates to [`end_line`](rdoc-ref:Location#end_line) of the associated location object.
+    #--
+    #: () -> Integer
     def end_line
       location.end_line
     end
 
-    # The start offset of the node in the source. This method is effectively a
-    # delegate method to the location object.
+    # Delegates to [`start_offset`](rdoc-ref:Location#start_offset) of the associated location object.
+    #--
+    #: () -> Integer
     def start_offset
       location = @location
       location.is_a?(Location) ? location.start_offset : location >> 32
     end
 
-    # The end offset of the node in the source. This method is effectively a
-    # delegate method to the location object.
+    # Delegates to [`end_offset`](rdoc-ref:Location#end_offset) of the associated location object.
+    #--
+    #: () -> Integer
     def end_offset
       location = @location
       location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
     end
 
-    # Delegates to the start_character_offset of the associated location object.
+    # Delegates to [`start_character_offset`](rdoc-ref:Location#start_character_offset)
+    # of the associated location object.
+    #--
+    #: () -> Integer
     def start_character_offset
       location.start_character_offset
     end
 
-    # Delegates to the end_character_offset of the associated location object.
+    # Delegates to [`end_character_offset`](rdoc-ref:Location#end_character_offset)
+    # of the associated location object.
+    #--
+    #: () -> Integer
     def end_character_offset
       location.end_character_offset
     end
 
-    # Delegates to the cached_start_code_units_offset of the associated location
-    # object.
+    # Delegates to [`cached_start_code_units_offset`](rdoc-ref:Location#cached_start_code_units_offset)
+    # of the associated location object.
+    #--
+    #: (_CodeUnitsCache cache) -> Integer
     def cached_start_code_units_offset(cache)
       location.cached_start_code_units_offset(cache)
     end
 
-    # Delegates to the cached_end_code_units_offset of the associated location
-    # object.
+    # Delegates to [`cached_end_code_units_offset`](rdoc-ref:Location#cached_end_code_units_offset)
+    # of the associated location object.
+    #--
+    #: (_CodeUnitsCache cache) -> Integer
     def cached_end_code_units_offset(cache)
       location.cached_end_code_units_offset(cache)
     end
 
-    # Delegates to the start_column of the associated location object.
+    # Delegates to [`start_column`](rdoc-ref:Location#start_column) of the associated location object.
+    #--
+    #: () -> Integer
     def start_column
       location.start_column
     end
 
-    # Delegates to the end_column of the associated location object.
+    # Delegates to [`end_column`](rdoc-ref:Location#end_column) of the associated location object.
+    #--
+    #: () -> Integer
     def end_column
       location.end_column
     end
 
-    # Delegates to the start_character_column of the associated location object.
+    # Delegates to [`start_character_column`](rdoc-ref:Location#start_character_column)
+    # of the associated location object.
+    #--
+    #: () -> Integer
     def start_character_column
       location.start_character_column
     end
 
-    # Delegates to the end_character_column of the associated location object.
+    # Delegates to [`end_character_column`](rdoc-ref:Location#end_character_column)
+    # of the associated location object.
+    #--
+    #: () -> Integer
     def end_character_column
       location.end_character_column
     end
 
-    # Delegates to the cached_start_code_units_column of the associated location
-    # object.
+    # Delegates to [`cached_start_code_units_column`](rdoc-ref:Location#cached_start_code_units_column)
+    # of the associated location object.
+    #--
+    #: (_CodeUnitsCache cache) -> Integer
     def cached_start_code_units_column(cache)
       location.cached_start_code_units_column(cache)
     end
 
-    # Delegates to the cached_end_code_units_column of the associated location
-    # object.
+    # Delegates to [`cached_end_code_units_column`](rdoc-ref:Location#cached_end_code_units_column)
+    # of the associated location object.
+    #--
+    #: (_CodeUnitsCache cache) -> Integer
     def cached_end_code_units_column(cache)
       location.cached_end_code_units_column(cache)
     end
 
-    # Delegates to the leading_comments of the associated location object.
+    # Delegates to [`leading_comments`](rdoc-ref:Location#leading_comments) of the associated location object.
+    #--
+    #: () -> Array[Comment]
     def leading_comments
       location.leading_comments
     end
 
-    # Delegates to the trailing_comments of the associated location object.
+    # Delegates to [`trailing_comments`](rdoc-ref:Location#trailing_comments) of the associated location object.
+    #--
+    #: () -> Array[Comment]
     def trailing_comments
       location.trailing_comments
     end
 
-    # Delegates to the comments of the associated location object.
+    # Delegates to [`comments`](rdoc-ref:Location#comments) of the associated location object.
+    #--
+    #: () -> Array[Comment]
     def comments
       location.comments
     end
 
+    # :section:
+
     # Returns all of the lines of the source code associated with this node.
+    #--
+    #: () -> Array[String]
     def source_lines
       location.source_lines
     end
@@ -133,6 +205,8 @@ module Prism
     alias script_lines source_lines
 
     # Slice the location of the node from the source.
+    #--
+    #: () -> String
     def slice
       location.slice
     end
@@ -140,28 +214,38 @@ module Prism
     # Slice the location of the node from the source, starting at the beginning
     # of the line that the location starts on, ending at the end of the line
     # that the location ends on.
+    #--
+    #: () -> String
     def slice_lines
       location.slice_lines
     end
 
     # An bitset of flags for this node. There are certain flags that are common
     # for all nodes, and then some nodes have specific flags.
-    attr_reader :flags
+    # :stopdoc:
+    attr_reader :flags #: Integer
     protected :flags
+    # :startdoc:
 
     # Returns true if the node has the newline flag set.
+    #--
+    #: () -> bool
     def newline?
       flags.anybits?(NodeFlags::NEWLINE)
     end
 
     # Returns true if the node has the static literal flag set.
+    #--
+    #: () -> bool
     def static_literal?
       flags.anybits?(NodeFlags::STATIC_LITERAL)
     end
 
     # Similar to inspect, but respects the current level of indentation given by
     # the pretty print object.
-    def pretty_print(q)
+    #--
+    #: (PP q) -> void
+    def pretty_print(q) # :nodoc:
       q.seplist(inspect.chomp.each_line, -> { q.breakable }) do |line|
         q.text(line.chomp)
       end
@@ -169,6 +253,8 @@ module Prism
     end
 
     # Convert this node into a graphviz dot graph string.
+    #--
+    #: () -> String
     def to_dot
       # @type self: node
       DotVisitor.new.tap { |visitor| accept(visitor) }.to_dot
@@ -180,28 +266,18 @@ module Prism
     #
     # Important to note is that the column given to this method should be in
     # bytes, as opposed to characters or code units.
+    #--
+    #: (Integer line, Integer column) -> Array[node]
     def tunnel(line, column)
-      queue = [self] #: Array[Prism::node]
-      result = [] #: Array[Prism::node]
+      queue = [self] #: Array[node]
+      result = [] #: Array[node]
+      offset = source.byte_offset(line, column)
 
       while (node = queue.shift)
         result << node
 
-        node.compact_child_nodes.each do |child_node|
-          child_location = child_node.location
-
-          start_line = child_location.start_line
-          end_line = child_location.end_line
-
-          if start_line == end_line
-            if line == start_line && column >= child_location.start_column && column < child_location.end_column
-              queue << child_node
-              break
-            end
-          elsif (line == start_line && column >= child_location.start_column) || (line == end_line && column < child_location.end_column)
-            queue << child_node
-            break
-          elsif line > start_line && line < end_line
+        node.each_child_node do |child_node|
+          if child_node.start_offset <= offset && offset < child_node.end_offset
             queue << child_node
             break
           end
@@ -212,13 +288,14 @@ module Prism
     end
 
     # Returns the first node that matches the given block when visited in a
-    # depth-first search. This is useful for finding a node that matches a
+    # breadth-first search. This is useful for finding a node that matches a
     # particular condition.
     #
     #     node.breadth_first_search { |node| node.node_id == node_id }
-    #
-    def breadth_first_search(&block)
-      queue = [self] #: Array[Prism::node]
+    #--
+    #: () { (node) -> bool } -> node?
+    def breadth_first_search(&blk)
+      queue = [self] #: Array[node]
 
       while (node = queue.shift)
         return node if yield node
@@ -227,10 +304,33 @@ module Prism
 
       nil
     end
+    alias find breadth_first_search
+
+    # Returns all of the nodes that match the given block when visited in a
+    # breadth-first search. This is useful for finding all nodes that match a
+    # particular condition.
+    #
+    #     node.breadth_first_search_all { |node| node.is_a?(Prism::CallNode) }
+    #--
+    #: () { (node) -> bool } -> Array[node]
+    def breadth_first_search_all(&blk)
+      queue = [self] #: Array[Prism::node]
+      results = [] #: Array[Prism::node]
+
+      while (node = queue.shift)
+        results << node if yield node
+        queue.concat(node.compact_child_nodes)
+      end
+
+      results
+    end
+    alias find_all breadth_first_search_all
 
     # Returns a list of the fields that exist for this node class. Fields
     # describe the structure of the node. This kind of reflection is useful for
     # things like recursively visiting each node _and_ field in the tree.
+    #--
+    #: () -> Array[Reflection::Field]
     def self.fields
       # This method should only be called on subclasses of Node, not Node
       # itself.
@@ -240,38 +340,57 @@ module Prism
     end
 
     # --------------------------------------------------------------------------
-    # :section: Node interface
-    # These methods are effectively abstract methods that must be implemented by
-    # the various subclasses of Node. They are here to make it easier to work
-    # with typecheckers.
+    # :section: Node Interface
+    # These methods are effectively abstract methods that are implemented by
+    # the various subclasses of Node.
     # --------------------------------------------------------------------------
 
     # Accepts a visitor and calls back into the specialized visit function.
+    #--
+    #: (_Visitor visitor) -> untyped
     def accept(visitor)
       raise NoMethodError, "undefined method `accept' for #{inspect}"
     end
 
     # Returns an array of child nodes, including `nil`s in the place of optional
     # nodes that were not present.
+    #--
+    #: () -> Array[node?]
     def child_nodes
       raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
     end
 
     alias deconstruct child_nodes
 
+    # With a block given, yields each child node. Without a block, returns
+    # an enumerator that contains each child node. Excludes any `nil`s in
+    # the place of optional nodes that were not present.
+    #--
+    #: () -> Enumerator[node, void]
+    #: () { (node) -> void } -> void
+    def each_child_node(&blk)
+      raise NoMethodError, "undefined method `each_child_node' for #{inspect}"
+    end
+
     # Returns an array of child nodes, excluding any `nil`s in the place of
     # optional nodes that were not present.
+    #--
+    #: () -> Array[node]
     def compact_child_nodes
       raise NoMethodError, "undefined method `compact_child_nodes' for #{inspect}"
     end
 
     # Returns an array of child nodes and locations that could potentially have
     # comments attached to them.
+    #--
+    #: () -> Array[node | Location]
     def comment_targets
       raise NoMethodError, "undefined method `comment_targets' for #{inspect}"
     end
 
     # Returns a string representation of the node.
+    #--
+    #: () -> String
     def inspect
       raise NoMethodError, "undefined method `inspect' for #{inspect}"
     end
@@ -288,6 +407,8 @@ module Prism
     # it uses a single integer comparison, but also because if you're on CRuby
     # you can take advantage of the fact that case statements with all symbol
     # keys will use a jump table.
+    #--
+    #: () -> Symbol
     def type
       raise NoMethodError, "undefined method `type' for #{inspect}"
     end
@@ -296,6 +417,8 @@ module Prism
     # splitting on the type of the node without having to do a long === chain.
     # Note that like #type, it will still be slower than using == for a single
     # class, but should be faster in a case statement or an array comparison.
+    #--
+    #: () -> Symbol
     def self.type
       raise NoMethodError, "undefined method `type' for #{inspect}"
     end
@@ -306,7 +429,13 @@ module Prism
   #<%= line %>
   <%- end -%>
   class <%= node.name -%> < Node
+    <%- node.fields.each do |field| -%>
+    # @rbs @<%= field.name %>: <%= field.rbs_class %>
+    <%- end -%>
+
     # Initialize a new <%= node.name %> node.
+    #--
+    #: (Source source, Integer node_id, Location location, Integer flags, <%= node.fields.map { |field| "#{field.rbs_class} #{field.name}" }.join(", ") %>) -> void
     def initialize(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
       @source = source
       @node_id = node_id
@@ -320,12 +449,27 @@ module Prism
       <%- end -%>
     end
 
-    # def accept: (Visitor visitor) -> void
+    # ---------
+    # :section: Repository
+    # Methods related to Relocation.
+    # ---------
+
+    # ----------------------------------------------------------------------------------
+    # :section: Node Interface
+    # These methods are present on all subclasses of Node.
+    # Read the [node interface docs](Node.html#node-interface) for more information.
+    # ----------------------------------------------------------------------------------
+
+    # See Node.accept.
+    #--
+    #: (_Visitor visitor) -> untyped
     def accept(visitor)
       visitor.visit_<%= node.human %>(self)
     end
 
-    # def child_nodes: () -> Array[Node?]
+    # See Node.child_nodes.
+    #--
+    #: () -> Array[node?]
     def child_nodes
       [<%= node.fields.map { |field|
         case field
@@ -335,7 +479,28 @@ module Prism
       }.compact.join(", ") %>]
     end
 
-    # def compact_child_nodes: () -> Array[Node]
+    # See Node.each_child_node.
+    #--
+    #: () -> Enumerator[node, void]
+    #: () { (node) -> void } -> void
+    def each_child_node(&blk)
+      return to_enum(:each_child_node) unless block_given?
+
+      <%- node.fields.each do |field| -%>
+      <%- case field -%>
+      <%- when Prism::Template::NodeField -%>
+      yield <%= field.name %>
+      <%- when Prism::Template::OptionalNodeField -%>
+      if (<%= field.name %> = self.<%= field.name %>); yield <%= field.name %>; end
+      <%- when Prism::Template::NodeListField -%>
+      <%= field.name %>.each { |node| yield node }
+      <%- end -%>
+      <%- end -%>
+    end
+
+    # See Node.compact_child_nodes.
+    #--
+    #: () -> Array[node]
     def compact_child_nodes
       <%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%>
       compact = [] #: Array[Prism::node]
@@ -344,7 +509,7 @@ module Prism
       <%- when Prism::Template::NodeField -%>
       compact << <%= field.name %>
       <%- when Prism::Template::OptionalNodeField -%>
-      compact << <%= field.name %> if <%= field.name %>
+      if (<%= field.name %> = self.<%= field.name %>); compact << <%= field.name %>; end
       <%- when Prism::Template::NodeListField -%>
       compact.concat(<%= field.name %>)
       <%- end -%>
@@ -360,7 +525,9 @@ module Prism
       <%- end -%>
     end
 
-    # def comment_targets: () -> Array[Node | Location]
+    # See Node.comment_targets.
+    #--
+    #: () -> Array[node | Location]
     def comment_targets
       [<%= node.fields.map { |field|
         case field
@@ -370,50 +537,101 @@ module Prism
       }.compact.join(", ") %>] #: Array[Prism::node | Location]
     end
 
-    # def copy: (<%= (["?node_id: Integer", "?location: Location", "?flags: Integer"] + node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }).join(", ") %>) -> <%= node.name %>
+    # :call-seq:
+    #   copy(**fields) -> <%= node.name %>
+    #
+    # Creates a copy of self with the given fields, using self as the template.
+    #--
+    #: (?node_id: Integer, ?location: Location, ?flags: Integer, <%= node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }.join(", ") %>) -> <%= node.name %>
     def copy(<%= (["node_id", "location", "flags"] + node.fields.map(&:name)).map { |field| "#{field}: self.#{field}" }.join(", ") %>)
       <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
     end
 
-    # def deconstruct: () -> Array[Node?]
     alias deconstruct child_nodes
 
-    # def deconstruct_keys: (Array[Symbol] keys) -> { <%= (["node_id: Integer", "location: Location"] + node.fields.map { |field| "#{field.name}: #{field.rbs_class}" }).join(", ") %> }
-    def deconstruct_keys(keys)
+    #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+    def deconstruct_keys(keys) # :nodoc:
       { <%= (["node_id: node_id", "location: location"] + node.fields.map { |field| "#{field.name}: #{field.name}" }).join(", ") %> }
     end
+
+    # See `Node#type`.
+    #--
+    #: () -> :<%= node.human %>
+    def type
+      :<%= node.human %>
+    end
+
+    # See `Node.type`.
+    #--
+    #: () -> :<%= node.human %>
+    def self.type
+      :<%= node.human %>
+    end
+
+    #: () -> String
+    def inspect # :nodoc:
+      InspectVisitor.compose(self)
+    end
+
+    # :section:
+
     <%- if (node_flags = node.flags) -%>
     <%- node_flags.values.each do |value| -%>
-
-    # def <%= value.name.downcase %>?: () -> bool
+    # :category: Flags
+    # <%= value.comment %>
+    #--
+    #: () -> bool
     def <%= value.name.downcase %>?
       flags.anybits?(<%= node_flags.name %>::<%= value.name %>)
     end
+
     <%- end -%>
     <%- end -%>
     <%- node.fields.each do |field| -%>
-
+    <%- case field -%>
+    <%- when Prism::Template::LocationField -%>
+    # :category: Locations
+    # :call-seq:
+    #   <%= field.name %> -> <%= field.call_seq_type %>
+    #
     <%- if field.comment.nil? -%>
-    # attr_reader <%= field.name %>: <%= field.rbs_class %>
+    # Returns the Location represented by `<%= field.name %>`.
     <%- else -%>
     <%- field.each_comment_line do |line| -%>
     #<%= line %>
     <%- end -%>
     <%- end -%>
-    <%- case field -%>
-    <%- when Prism::Template::LocationField -%>
+    #--
+    #: () -> Location
     def <%= field.name %>
       location = @<%= field.name %>
       return location if location.is_a?(Location)
       @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
     end
 
+    # :category: Repository
     # Save the <%= field.name %> location using the given saved source so that
     # it can be retrieved later.
+    #--
+    #: (_Repository repository) -> Relocation::Entry
     def save_<%= field.name %>(repository)
       repository.enter(node_id, :<%= field.name %>)
     end
+
     <%- when Prism::Template::OptionalLocationField -%>
+    # :category: Locations
+    # :call-seq:
+    #   <%= field.name %> -> <%= field.call_seq_type %>
+    #
+    <%- if field.comment.nil? -%>
+    # Returns the Location represented by `<%= field.name %>`.
+    <%- else -%>
+    <%- field.each_comment_line do |line| -%>
+    #<%= line %>
+    <%- end -%>
+    <%- end -%>
+    #--
+    #: () -> Location?
     def <%= field.name %>
       location = @<%= field.name %>
       case location
@@ -426,54 +644,69 @@ module Prism
       end
     end
 
+    # :category: Repository
     # Save the <%= field.name %> location using the given saved source so that
     # it can be retrieved later.
+    #--
+    #: (_Repository repository) -> Relocation::Entry?
     def save_<%= field.name %>(repository)
       repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
     end
     <%- else -%>
-    attr_reader :<%= field.name %>
+    # :call-seq:
+    #   <%= field.name %> -> <%= field.call_seq_type %>
+    #
+    <%- if field.comment.nil? -%>
+    # Returns the `<%= field.name %>` attribute.
+    <%- else -%>
+    <%- field.each_comment_line do |line| -%>
+    #<%= line %>
     <%- end -%>
     <%- end -%>
+    #--
+    #: () -> <%= field.rbs_class %>
+    def <%= field.name %>
+      @<%= field.name %>
+    end
+
+    <%- end -%>
+    <%- end -%>
+    # :section: Slicing
+
     <%- node.fields.each do |field| -%>
     <%- case field -%>
     <%- when Prism::Template::LocationField -%>
     <%- raise unless field.name.end_with?("_loc") -%>
     <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
-
-    # def <%= field.name.delete_suffix("_loc") %>: () -> String
+    # :call-seq:
+    #   <%= field.name.delete_suffix("_loc") %> -> String
+    #
+    # Slice the location of <%= field.name %> from the source.
+    #--
+    #: () -> String
     def <%= field.name.delete_suffix("_loc") %>
       <%= field.name %>.slice
     end
+
     <%- when Prism::Template::OptionalLocationField -%>
     <%- raise unless field.name.end_with?("_loc") -%>
     <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
-
-    # def <%= field.name.delete_suffix("_loc") %>: () -> String?
+    # :call-seq:
+    #   <%= field.name.delete_suffix("_loc") %> -> String | nil
+    #
+    # Slice the location of <%= field.name %> from the source.
+    #--
+    #: () -> String?
     def <%= field.name.delete_suffix("_loc") %>
       <%= field.name %>&.slice
     end
+
     <%- end -%>
     <%- end -%>
+    # :section:
 
-    # def inspect -> String
-    def inspect
-      InspectVisitor.compose(self)
-    end
-
-    # Return a symbol representation of this node type. See `Node#type`.
-    def type
-      :<%= node.human %>
-    end
-
-    # Return a symbol representation of this node type. See `Node::type`.
-    def self.type
-      :<%= node.human %>
-    end
-
-    # Implements case-equality for the node. This is effectively == but without
-    # comparing the value of locations. Locations are checked only for presence.
-    def ===(other)
+    #: (untyped other) -> boolish
+    def ===(other) # :nodoc:
       other.is_a?(<%= node.name %>)<%= " &&" if (fields = [*node.flags, *node.fields]).any? %>
         <%- fields.each_with_index do |field, index| -%>
         <%- if field.is_a?(Prism::Template::LocationField) || field.is_a?(Prism::Template::OptionalLocationField) -%>
diff --git a/prism/templates/lib/prism/reflection.rb.erb b/prism/templates/lib/prism/reflection.rb.erb
index 6c8b2f4d25..0012f120b2 100644
--- a/prism/templates/lib/prism/reflection.rb.erb
+++ b/prism/templates/lib/prism/reflection.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
 module Prism
   # The Reflection module provides the ability to reflect on the structure of
   # the syntax tree itself, as opposed to looking at a single syntax tree. This
@@ -7,9 +10,11 @@ module Prism
     # for all other field types.
     class Field
       # The name of the field.
-      attr_reader :name
+      attr_reader :name #: Symbol
 
       # Initializes the field with the given name.
+      #--
+      #: (Symbol name) -> void
       def initialize(name)
         @name = name
       end
@@ -83,9 +88,11 @@ module Prism
     # the bitset should be accessed through their query methods.
     class FlagsField < Field
       # The names of the flags in the bitset.
-      attr_reader :flags
+      attr_reader :flags #: Array[Symbol]
 
       # Initializes the flags field with the given name and flags.
+      #--
+      #: (Symbol name, Array[Symbol] flags) -> void
       def initialize(name, flags)
         super(name)
         @flags = flags
@@ -93,6 +100,8 @@ module Prism
     end
 
     # Returns the fields for the given node.
+    #--
+    #: (singleton(Node) node) -> Array[Field]
     def self.fields_for(node)
       case node.type
       <%- nodes.each do |node| -%>
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 104b60f484..a676f957af 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -1,16 +1,19 @@
+#--
+# rbs_inline: enabled
+
 require "stringio"
 require_relative "polyfill/unpack1"
 
 module Prism
   # A module responsible for deserializing parse results.
-  module Serialize
+  module Serialize # :nodoc:
     # The major version of prism that we are expecting to find in the serialized
     # strings.
     MAJOR_VERSION = 1
 
     # The minor version of prism that we are expecting to find in the serialized
     # strings.
-    MINOR_VERSION = 4
+    MINOR_VERSION = 9
 
     # The patch version of prism that we are expecting to find in the serialized
     # strings.
@@ -20,9 +23,11 @@ module Prism
     #
     # The formatting of the source of this method is purposeful to illustrate
     # the structure of the serialized data.
+    #--
+    #: (String input, String serialized, bool freeze) -> ParseResult
     def self.load_parse(input, serialized, freeze)
       input = input.dup
-      source = Source.for(input)
+      source = Source.for(input, 1, [])
       loader = Loader.new(source, serialized)
 
                        loader.load_header
@@ -38,16 +43,17 @@ module Prism
       data_loc =       loader.load_optional_location_object(freeze)
       errors =         loader.load_errors(encoding, freeze)
       warnings =       loader.load_warnings(encoding, freeze)
+      continuable =    loader.load_bool
       cpool_base =     loader.load_uint32
       cpool_size =     loader.load_varuint
 
-      constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+      constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
 
-      node =           loader.load_node(constant_pool, encoding, freeze)
+      node =           loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
                        loader.load_constant_pool(constant_pool)
       raise unless     loader.eof?
 
-      result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source)
+      result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, continuable, source)
       result.freeze if freeze
 
       input.force_encoding(encoding)
@@ -73,8 +79,10 @@ module Prism
     #
     # The formatting of the source of this method is purposeful to illustrate
     # the structure of the serialized data.
+    #--
+    #: (String input, String serialized, bool freeze) -> LexResult
     def self.load_lex(input, serialized, freeze)
-      source = Source.for(input)
+      source = Source.for(input, 1, [])
       loader = Loader.new(source, serialized)
 
       tokens =         loader.load_tokens
@@ -90,9 +98,10 @@ module Prism
       data_loc =       loader.load_optional_location_object(freeze)
       errors =         loader.load_errors(encoding, freeze)
       warnings =       loader.load_warnings(encoding, freeze)
+      continuable =    loader.load_bool
       raise unless     loader.eof?
 
-      result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source)
+      result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, continuable, source)
 
       tokens.each do |token|
         token[0].value.force_encoding(encoding)
@@ -117,8 +126,10 @@ module Prism
     #
     # The formatting of the source of this method is purposeful to illustrate
     # the structure of the serialized data.
+    #--
+    #: (String input, String serialized, bool freeze) -> Array[Comment]
     def self.load_parse_comments(input, serialized, freeze)
-      source = Source.for(input)
+      source = Source.for(input, 1, [])
       loader = Loader.new(source, serialized)
 
                    loader.load_header
@@ -139,8 +150,10 @@ module Prism
     #
     # The formatting of the source of this method is purposeful to illustrate
     # the structure of the serialized data.
+    #--
+    #: (String input, String serialized, bool freeze) -> ParseLexResult
     def self.load_parse_lex(input, serialized, freeze)
-      source = Source.for(input)
+      source = Source.for(input, 1, [])
       loader = Loader.new(source, serialized)
 
       tokens =         loader.load_tokens
@@ -157,17 +170,18 @@ module Prism
       data_loc =       loader.load_optional_location_object(freeze)
       errors =         loader.load_errors(encoding, freeze)
       warnings =       loader.load_warnings(encoding, freeze)
+      continuable =    loader.load_bool
       cpool_base =     loader.load_uint32
       cpool_size =     loader.load_varuint
 
-      constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+      constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
 
-      node =           loader.load_node(constant_pool, encoding, freeze)
+      node =           loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
                        loader.load_constant_pool(constant_pool)
       raise unless     loader.eof?
 
-      value = [node, tokens]
-      result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
+      value = [node, tokens] #: [ProgramNode, Array[[Token, Integer]]]
+      result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
 
       tokens.each do |token|
         token[0].value.force_encoding(encoding)
@@ -189,34 +203,36 @@ module Prism
     end
 
     class ConstantPool # :nodoc:
-      attr_reader :size
+      attr_reader :size #: Integer
+
+      # @rbs @serialized: String
+      # @rbs @base: Integer
+      # @rbs @pool: Array[Symbol?]
 
-      def initialize(input, serialized, base, size)
-        @input = input
+      #: (String serialized, Integer base, Integer size) -> void
+      def initialize(serialized, base, size)
         @serialized = serialized
         @base = base
         @size = size
         @pool = Array.new(size, nil)
       end
 
+      #: (Integer index, Encoding encoding) -> Symbol
       def get(index, encoding)
         @pool[index] ||=
           begin
             offset = @base + index * 8
-            start = @serialized.unpack1("L", offset: offset)
-            length = @serialized.unpack1("L", offset: offset + 4)
+            start = @serialized.unpack1("L", offset: offset) #: Integer
+            length = @serialized.unpack1("L", offset: offset + 4) #: Integer
 
-            if start.nobits?(1 << 31)
-              @input.byteslice(start, length).force_encoding(encoding).to_sym
-            else
-              @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym
-            end
+            (@serialized.byteslice(start, length) or raise).force_encoding(encoding).to_sym
           end
       end
     end
 
     if RUBY_ENGINE == "truffleruby"
       # StringIO is synchronized and that adds a high overhead on TruffleRuby.
+      # @rbs skip
       class FastStringIO # :nodoc:
         attr_accessor :pos
 
@@ -246,8 +262,11 @@ module Prism
     end
 
     class Loader # :nodoc:
-      attr_reader :input, :io, :source
+      attr_reader :input #: String
+      attr_reader :io #: StringIO
+      attr_reader :source #: Source
 
+      #: (Source source, String serialized) -> void
       def initialize(source, serialized)
         @input = source.source.dup
         raise unless serialized.encoding == Encoding::BINARY
@@ -256,40 +275,46 @@ module Prism
         define_load_node_lambdas if RUBY_ENGINE != "ruby"
       end
 
+      #: () -> bool
       def eof?
         io.getbyte
         io.eof?
       end
 
+      #: (ConstantPool constant_pool) -> void
       def load_constant_pool(constant_pool)
         trailer = 0
 
         constant_pool.size.times do |index|
-          start, length = io.read(8).unpack("L2")
-          trailer += length if start.anybits?(1 << 31)
+          length = (io.read(8) or raise).unpack1("L", offset: 4) #: Integer
+          trailer += length
         end
 
         io.read(trailer)
       end
 
+      #: () -> void
       def load_header
         raise "Invalid serialization" if io.read(5) != "PRISM"
-        raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
+        raise "Invalid serialization" if (io.read(3) or raise).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
         raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
       end
 
+      #: () -> Encoding
       def load_encoding
-        encoding = Encoding.find(io.read(load_varuint))
+        encoding = Encoding.find((io.read(load_varuint) or raise)) or raise
         @input = input.force_encoding(encoding).freeze
         encoding
       end
 
+      #: (bool freeze) -> Array[Integer]
       def load_line_offsets(freeze)
         offsets = Array.new(load_varuint) { load_varuint }
         offsets.freeze if freeze
         offsets
       end
 
+      #: (bool freeze) -> Array[Comment]
       def load_comments(freeze)
         comments =
           Array.new(load_varuint) do
@@ -297,6 +322,7 @@ module Prism
               case load_varuint
               when 0 then InlineComment.new(load_location_object(freeze))
               when 1 then EmbDocComment.new(load_location_object(freeze))
+              else raise
               end
 
             comment.freeze if freeze
@@ -307,6 +333,7 @@ module Prism
         comments
       end
 
+      #: (bool freeze) -> Array[MagicComment]
       def load_magic_comments(freeze)
         magic_comments =
           Array.new(load_varuint) do
@@ -331,10 +358,11 @@ module Prism
         <%- warnings.each do |warning| -%>
         <%= warning.name.downcase.to_sym.inspect %>,
         <%- end -%>
-      ].freeze
+      ].freeze #: Array[Symbol]
 
       private_constant :DIAGNOSTIC_TYPES
 
+      #: () -> Symbol
       def load_error_level
         level = io.getbyte
 
@@ -350,13 +378,14 @@ module Prism
         end
       end
 
+      #: (Encoding encoding, bool freeze) -> Array[ParseError]
       def load_errors(encoding, freeze)
         errors =
           Array.new(load_varuint) do
             error =
               ParseError.new(
                 DIAGNOSTIC_TYPES.fetch(load_varuint),
-                load_embedded_string(encoding),
+                load_string(encoding),
                 load_location_object(freeze),
                 load_error_level
               )
@@ -369,6 +398,7 @@ module Prism
         errors
       end
 
+      #: () -> Symbol
       def load_warning_level
         level = io.getbyte
 
@@ -382,13 +412,14 @@ module Prism
         end
       end
 
+      #: (Encoding encoding, bool freeze) -> Array[ParseWarning]
       def load_warnings(encoding, freeze)
         warnings =
           Array.new(load_varuint) do
             warning =
               ParseWarning.new(
                 DIAGNOSTIC_TYPES.fetch(load_varuint),
-                load_embedded_string(encoding),
+                load_string(encoding),
                 load_location_object(freeze),
                 load_warning_level
               )
@@ -401,15 +432,15 @@ module Prism
         warnings
       end
 
+      #: () -> Array[[Token, Integer]]
       def load_tokens
-        tokens = []
+        tokens = [] #: Array[[Token, Integer]]
 
         while (type = TOKEN_TYPES.fetch(load_varuint))
-          start = load_varuint
-          length = load_varuint
+          location = load_location_object(false)
+
           lex_state = load_varuint
 
-          location = Location.new(@source, start, length)
           token = Token.new(@source, type, location.slice, location)
 
           tokens << [token, lex_state]
@@ -420,25 +451,29 @@ module Prism
 
       # variable-length integer using https://en.wikipedia.org/wiki/LEB128
       # This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
+      #--
+      #: () -> Integer
       def load_varuint
-        n = io.getbyte
+        n = (io.getbyte or raise)
         if n < 128
           n
         else
           n -= 128
           shift = 0
-          while (b = io.getbyte) >= 128
+          while (b = (io.getbyte or raise)) >= 128
             n += (b - 128) << (shift += 7)
           end
           n + (b << (shift + 7))
         end
       end
 
+      #: () -> Integer
       def load_varsint
         n = load_varuint
         (n >> 1) ^ (-(n & 1))
       end
 
+      #: () -> Integer
       def load_integer
         negative = io.getbyte != 0
         length = load_varuint
@@ -450,14 +485,22 @@ module Prism
         value
       end
 
+      #: () -> Float
       def load_double
-        io.read(8).unpack1("D")
+        (io.read(8) or raise).unpack1("D") #: Float
       end
 
+      #: () -> bool
+      def load_bool
+        (io.getbyte or raise) != 0
+      end
+
+      #: () -> Integer
       def load_uint32
-        io.read(4).unpack1("L")
+        (io.read(4) or raise).unpack1("L") #: Integer
       end
 
+      #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node?
       def load_optional_node(constant_pool, encoding, freeze)
         if io.getbyte != 0
           io.pos -= 1
@@ -465,90 +508,121 @@ module Prism
         end
       end
 
-      def load_embedded_string(encoding)
-        io.read(load_varuint).force_encoding(encoding).freeze
-      end
-
+      #: (Encoding encoding) -> String
       def load_string(encoding)
-        case (type = io.getbyte)
-        when 1
-          input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
-        when 2
-          load_embedded_string(encoding)
-        else
-          raise "Unknown serialized string type: #{type}"
-        end
+        (io.read(load_varuint) or raise).force_encoding(encoding).freeze
       end
 
+      #: (bool freeze) -> Location
       def load_location_object(freeze)
         location = Location.new(source, load_varuint, load_varuint)
         location.freeze if freeze
         location
       end
 
+      # Load a location object from the serialized data. Note that we are lying
+      # about the signature a bit here, because we sometimes load it as a packed
+      # integer instead of an object.
+      #--
+      #: (bool freeze) -> Location
       def load_location(freeze)
         return load_location_object(freeze) if freeze
-        (load_varuint << 32) | load_varuint
+        (load_varuint << 32) | load_varuint #: Location
       end
 
+      # Load an optional location object from the serialized data if it is
+      # present. Note that we are lying about the signature a bit here, because
+      # we sometimes load it as a packed integer instead of an object.
+      #--
+      #: (bool freeze) -> Location?
       def load_optional_location(freeze)
         load_location(freeze) if io.getbyte != 0
       end
 
+      #: (bool freeze) -> Location?
       def load_optional_location_object(freeze)
         load_location_object(freeze) if io.getbyte != 0
       end
 
+      #: (ConstantPool constant_pool, Encoding encoding) -> Symbol
       def load_constant(constant_pool, encoding)
         index = load_varuint
         constant_pool.get(index - 1, encoding)
       end
 
+      #: (ConstantPool constant_pool, Encoding encoding) -> Symbol?
       def load_optional_constant(constant_pool, encoding)
         index = load_varuint
         constant_pool.get(index - 1, encoding) if index != 0
       end
 
       if RUBY_ENGINE == "ruby"
+        #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node
         def load_node(constant_pool, encoding, freeze)
           type = io.getbyte
           node_id = load_varuint
-          location = load_location(freeze)
-          value = case type
-          <%- nodes.each_with_index do |node, index| -%>
-          when <%= index + 1 %> then
-            <%- if node.needs_serialized_length? -%>
-            load_uint32
-            <%- end -%>
-            <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
-              case field
-              when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
-              when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
-              when Prism::Template::StringField then "load_string(encoding)"
-              when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }"
-              when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
-              when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
-              when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }"
-              when Prism::Template::LocationField then "load_location(freeze)"
-              when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
-              when Prism::Template::UInt8Field then "io.getbyte"
-              when Prism::Template::UInt32Field then "load_varuint"
-              when Prism::Template::IntegerField then "load_integer"
-              when Prism::Template::DoubleField then "load_double"
-              else raise
-              end
-            }].join(", ") -%>)
+          location = load_location(freeze) #: Location
+          value =
+            case type
+            <%- nodes.each_with_index do |node, index| -%>
+            when <%= index + 1 %>
+              <%- if node.needs_serialized_length? -%>
+              load_uint32
+              <%- end -%>
+              <%= node.name %>.new(
+                source,
+                node_id,
+                location,
+                load_varuint,
+                <%- node.fields.each do |field| -%>
+                <%- case field -%>
+                <%- when Prism::Template::NodeField -%>
+                load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+                <%- when Prism::Template::OptionalNodeField -%>
+                load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+                <%- when Prism::Template::StringField -%>
+                load_string(encoding),
+                <%- when Prism::Template::NodeListField -%>
+                Array.new(load_varuint) do
+                  load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+                end.tap { |nodes| nodes.freeze if freeze },
+                <%- when Prism::Template::ConstantField -%>
+                load_constant(constant_pool, encoding),
+                <%- when Prism::Template::OptionalConstantField -%>
+                load_optional_constant(constant_pool, encoding),
+                <%- when Prism::Template::ConstantListField -%>
+                Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze },
+                <%- when Prism::Template::LocationField -%>
+                load_location(freeze),
+                <%- when Prism::Template::OptionalLocationField -%>
+                load_optional_location(freeze),
+                <%- when Prism::Template::UInt8Field -%>
+                (io.getbyte or raise),
+                <%- when Prism::Template::UInt32Field -%>
+                load_varuint,
+                <%- when Prism::Template::IntegerField -%>
+                load_integer,
+                <%- when Prism::Template::DoubleField -%>
+                load_double,
+                <%- else raise -%>
+                <%- end -%>
+                <%- end -%>
+              )
             <%- end -%>
-          end
+            else
+              raise "Unknown node type: #{type}"
+            end
 
           value.freeze if freeze
           value
         end
       else
+        # @rbs skip
         def load_node(constant_pool, encoding, freeze)
-          @load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze)
+          @load_node_lambdas[(io.getbyte or raise)].call(constant_pool, encoding, freeze)
         end
 
+        # @rbs skip
         def define_load_node_lambdas
           @load_node_lambdas = [
             nil,
@@ -559,24 +633,46 @@ module Prism
               <%- if node.needs_serialized_length? -%>
               load_uint32
               <%- end -%>
-              value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
-                case field
-                when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
-                when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
-                when Prism::Template::StringField then "load_string(encoding)"
-                when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }"
-                when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
-                when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
-                when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }"
-                when Prism::Template::LocationField then "load_location(freeze)"
-                when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
-                when Prism::Template::UInt8Field then "io.getbyte"
-                when Prism::Template::UInt32Field then "load_varuint"
-                when Prism::Template::IntegerField then "load_integer"
-                when Prism::Template::DoubleField then "load_double"
-                else raise
-                end
-              }].join(", ") -%>)
+              value =
+                <%= node.name %>.new(
+                  source,
+                  node_id,
+                  location,
+                  load_varuint,
+                  <%- node.fields.map do |field| -%>
+                  <%- case field -%>
+                  <%- when Prism::Template::NodeField -%>
+                  load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+                  <%- when Prism::Template::OptionalNodeField -%>
+                  load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+                  <%- when Prism::Template::StringField -%>
+                  load_string(encoding),
+                  <%- when Prism::Template::NodeListField -%>
+                  Array.new(load_varuint) do
+                    load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+                  end,
+                  <%- when Prism::Template::ConstantField -%>
+                  load_constant(constant_pool, encoding),
+                  <%- when Prism::Template::OptionalConstantField -%>
+                  load_optional_constant(constant_pool, encoding),
+                  <%- when Prism::Template::ConstantListField -%>
+                  Array.new(load_varuint) { load_constant(constant_pool, encoding) },
+                  <%- when Prism::Template::LocationField -%>
+                  load_location(freeze),
+                  <%- when Prism::Template::OptionalLocationField -%>
+                  load_optional_location(freeze),
+                  <%- when Prism::Template::UInt8Field -%>
+                  (io.getbyte or raise),
+                  <%- when Prism::Template::UInt32Field -%>
+                  load_varuint,
+                  <%- when Prism::Template::IntegerField -%>
+                  load_integer,
+                  <%- when Prism::Template::DoubleField -%>
+                  load_double,
+                  <%- else raise -%>
+                  <%- end -%>
+                  <%- end -%>
+                )
               value.freeze if freeze
               value
             },
@@ -584,6 +680,10 @@ module Prism
           ]
         end
       end
+
+      # @rbs!
+      #   @load_node_lambdas: Array[Proc]
+      #   def define_load_node_lambdas: () -> void
     end
 
     # The token types that can be indexed by their enum values.
@@ -592,7 +692,7 @@ module Prism
       <%- tokens.each do |token| -%>
       <%= token.name.to_sym.inspect %>,
       <%- end -%>
-    ].freeze
+    ].freeze #: Array[Symbol?]
 
     private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
     private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb
index 4b30a1815b..f23e87d99e 100644
--- a/prism/templates/lib/prism/visitor.rb.erb
+++ b/prism/templates/lib/prism/visitor.rb.erb
@@ -1,4 +1,14 @@
+#--
+# rbs_inline: enabled
+
 module Prism
+  # @rbs!
+  #    interface _Visitor
+  #      <% nodes.each do |node| %>
+  #      def visit_<%= node.human %>: (<%= node.name %>) -> void
+  #      <% end %>
+  #    end
+
   # A class that knows how to walk down the tree. None of the individual visit
   # methods are implemented on this visitor, so it forces the consumer to
   # implement each one that they need. For a default implementation that
@@ -6,21 +16,27 @@ module Prism
   class BasicVisitor
     # Calls `accept` on the given node if it is not `nil`, which in turn should
     # call back into this visitor by calling the appropriate `visit_*` method.
+    #--
+    #: (node? node) -> void
     def visit(node)
       # @type self: _Visitor
       node&.accept(self)
     end
 
     # Visits each node in `nodes` by calling `accept` on each one.
+    #--
+    #: (Array[node?] nodes) -> void
     def visit_all(nodes)
       # @type self: _Visitor
       nodes.each { |node| node&.accept(self) }
     end
 
     # Visits the child nodes of `node` by calling `accept` on each one.
+    #--
+    #: (node node) -> void
     def visit_child_nodes(node)
       # @type self: _Visitor
-      node.compact_child_nodes.each { |node| node.accept(self) }
+      node.each_child_node { |node| node.accept(self) }
     end
   end
 
@@ -34,7 +50,7 @@ module Prism
   #
   #     class FooCalls < Prism::Visitor
   #       def visit_call_node(node)
-  #         if node.name == "foo"
+  #         if node.name == :foo
   #           # Do something with the node
   #         end
   #
@@ -47,7 +63,11 @@ module Prism
     <%- nodes.each_with_index do |node, index| -%>
 <%= "\n" if index != 0 -%>
     # Visit a <%= node.name %> node
-    alias visit_<%= node.human %> visit_child_nodes
+    #--
+    #: (<%= node.name %> node) -> void
+    def visit_<%= node.human %>(node)
+      node.each_child_node { |node| node.accept(self) }
+    end
     <%- end -%>
   end
 end
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
index ce98dc5acd..0dea732869 100644
--- a/prism/templates/src/diagnostic.c.erb
+++ b/prism/templates/src/diagnostic.c.erb
@@ -1,4 +1,16 @@
-#include "prism/diagnostic.h"
+#include "prism/internal/diagnostic.h"
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/list.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
 
 #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
 
@@ -75,16 +87,16 @@ typedef struct {
  * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
  */
 static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
-    // Special error that can be replaced
+    /* Special error that can be replaced */
     [PM_ERR_CANNOT_PARSE_EXPRESSION]            = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX },
 
-    // Errors that should raise argument errors
+    /* Errors that should raise argument errors */
     [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT]     = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
 
-    // Errors that should raise load errors
+    /* Errors that should raise load errors */
     [PM_ERR_SCRIPT_NOT_FOUND]                   = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD },
 
-    // Errors that should raise syntax errors
+    /* Errors that should raise syntax errors */
     [PM_ERR_ALIAS_ARGUMENT]                     = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE]  = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_AMPAMPEQ_MULTI_ASSIGN]              = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
@@ -102,6 +114,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_ARGUMENT_FORWARDING_UNBOUND]        = { "unexpected `...` in an non-parenthesized call", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND]   = { "unexpected `&`; no anonymous block parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES]    = { "unexpected ... when the parent method is not forwarding", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA] = { "unexpected ... in lambda argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK]  = { "unexpected ... in block argument", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ARGUMENT_NO_FORWARDING_STAR]        = { "unexpected `*`; no anonymous rest parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR]   = { "unexpected `**`; no anonymous keyword rest parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT]   = { "unexpected `*` splat argument after a `**` keyword splat argument", PM_ERROR_LEVEL_SYNTAX },
@@ -144,7 +158,9 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_CONDITIONAL_WHILE_PREDICATE]        = { "expected a predicate expression for the `while` statement", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = { "expected a constant after the `::` operator", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_DEF_ENDLESS]                        = { "could not parse the endless method body", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_ENDLESS_PARAMETERS]             = { "could not parse the endless method parameters", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_DEF_ENDLESS_SETTER]                 = { "invalid method name; a setter method cannot be defined in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_ENDLESS_DO_BLOCK]               = { "unexpected `do` for block in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_DEF_NAME]                           = { "unexpected %s; expected a method name", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_DEF_PARAMS_TERM]                    = { "expected a delimiter to close the parameters", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_DEF_PARAMS_TERM_PAREN]              = { "unexpected %s; expected a `)` to close the parameters", PM_ERROR_LEVEL_SYNTAX },
@@ -184,6 +200,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_EXPECT_FOR_DELIMITER]               = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_IDENT_REQ_PARAMETER]         = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_IN_DELIMITER]                = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN]     = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER]      = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER]        = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_MESSAGE]                     = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_EXPECT_RBRACKET]                    = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX },
@@ -298,6 +316,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_PARAMETER_UNEXPECTED_NO_KW]         = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS]       = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_CAPTURE_DUPLICATE]          = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE]     = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET]   = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA]     = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET]   = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
@@ -323,13 +342,15 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_PATTERN_TERM_PAREN]                 = { "expected a `)` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN]            = { "unexpected `||=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH]    = { "regexp encoding option '%c' differs from source encoding '%s'", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8]   = { "escaped non ASCII character in UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING]      = { "incompatible character encoding: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_REGEXP_NON_ESCAPED_MBC]             = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_INVALID_CHAR_PROPERTY]       = { "invalid character property name {%.*s}: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_INVALID_UNICODE_RANGE]       = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_NON_ESCAPED_MBC]             = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_PARSE_ERROR]                 = { "%s", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_UNKNOWN_OPTIONS]             = { "unknown regexp %s - %.*s", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_REGEXP_TERM]                        = { "unterminated regexp meets end of file; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP]   = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP]   = { "UTF-8 character in non UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_RESCUE_EXPRESSION]                  = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_RESCUE_MODIFIER_VALUE]              = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_RESCUE_TERM]                        = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX },
@@ -344,7 +365,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_STRING_INTERPOLATED_TERM]           = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_STRING_LITERAL_EOF]                 = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_STRING_LITERAL_TERM]                = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_SYMBOL_INVALID]                     = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
+    [PM_ERR_SYMBOL_INVALID]                     = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, /* TODO expected symbol? prism.c ~9719 */
     [PM_ERR_SYMBOL_TERM_DYNAMIC]                = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_SYMBOL_TERM_INTERPOLATED]           = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_TERNARY_COLON]                      = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
@@ -358,6 +379,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_UNEXPECTED_INDEX_KEYWORDS]          = { "unexpected keyword arg given in index assignment; keywords are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_UNEXPECTED_LABEL]                   = { "unexpected label", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_UNEXPECTED_MULTI_WRITE]             = { "unexpected multiple assignment; multiple assignment is not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE] = { "unexpected %s; expected a default value for a parameter", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_UNEXPECTED_RANGE_OPERATOR]          = { "unexpected range operator; .. and ... are non-associative and cannot be chained", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_UNEXPECTED_SAFE_NAVIGATION]         = { "&. inside multiple assignment destination", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT]     = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_SYNTAX },
@@ -370,7 +392,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_WRITE_TARGET_UNEXPECTED]            = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_XSTRING_TERM]                       = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX },
 
-    // Warnings
+    /* Warnings */
     [PM_WARN_AMBIGUOUS_BINARY_OPERATOR]         = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE },
     [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS]    = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE },
     [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS]     = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
@@ -406,8 +428,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
 /**
  * Get the human-readable name of the given diagnostic ID.
  */
-const char *
-pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
+static const char *
+pm_diagnostic_id_name(pm_diagnostic_id_t diag_id) {
     switch (diag_id) {
         <%- errors.each do |error| -%>
         case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>";
@@ -421,8 +443,8 @@ pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
     return "";
 }
 
-static inline const char *
-pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
+static PRISM_INLINE const char *
+pm_diagnostic_id_message(pm_diagnostic_id_t diag_id) {
     assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
 
     const char *message = diagnostic_messages[diag_id].message;
@@ -431,91 +453,102 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
     return message;
 }
 
-static inline uint8_t
-pm_diagnostic_level(pm_diagnostic_id_t diag_id) {
+static PRISM_INLINE uint8_t
+pm_diagnostic_id_level(pm_diagnostic_id_t diag_id) {
     assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
 
     return (uint8_t) diagnostic_messages[diag_id].level;
 }
 
 /**
+ * Get the type of the given diagnostic.
+ */
+const char *
+pm_diagnostic_type(const pm_diagnostic_t *diagnostic) {
+    return pm_diagnostic_id_name(diagnostic->diag_id);
+}
+
+/**
+ * Get the location of the given diagnostic.
+ */
+pm_location_t
+pm_diagnostic_location(const pm_diagnostic_t *diagnostic) {
+    return diagnostic->location;
+}
+
+/**
+ * Get the message of the given diagnostic.
+ */
+const char *
+pm_diagnostic_message(const pm_diagnostic_t *diagnostic) {
+    return diagnostic->message;
+}
+
+/**
+ * Get the error level associated with the given diagnostic.
+ */
+pm_error_level_t
+pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) {
+    return (pm_error_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
+ * Get the warning level associated with the given diagnostic.
+ */
+pm_warning_level_t
+pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) {
+    return (pm_warning_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
  * Append an error to the given list of diagnostic.
  */
-bool
-pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
-    if (diagnostic == NULL) return false;
+void
+pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
 
     *diagnostic = (pm_diagnostic_t) {
-        .location = { start, end },
+        .location = { .start = start, .length = length },
         .diag_id = diag_id,
-        .message = pm_diagnostic_message(diag_id),
-        .owned = false,
-        .level = pm_diagnostic_level(diag_id)
+        .message = pm_diagnostic_id_message(diag_id),
+        .level = pm_diagnostic_id_level(diag_id)
     };
 
     pm_list_append(list, (pm_list_node_t *) diagnostic);
-    return true;
 }
 
 /**
  * Append a diagnostic to the given list of diagnostics that is using a format
  * string for its message.
  */
-bool
-pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) {
+void
+pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) {
     va_list arguments;
     va_start(arguments, diag_id);
 
-    const char *format = pm_diagnostic_message(diag_id);
+    const char *format = pm_diagnostic_id_message(diag_id);
     int result = vsnprintf(NULL, 0, format, arguments);
     va_end(arguments);
 
     if (result < 0) {
-        return false;
+        return;
     }
 
-    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
-    if (diagnostic == NULL) {
-        return false;
-    }
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
 
-    size_t length = (size_t) (result + 1);
-    char *message = (char *) xmalloc(length);
-    if (message == NULL) {
-        xfree(diagnostic);
-        return false;
-    }
+    size_t message_length = (size_t) (result + 1);
+    char *message = (char *) pm_arena_alloc(arena, message_length, 1);
 
     va_start(arguments, diag_id);
-    vsnprintf(message, length, format, arguments);
+    vsnprintf(message, message_length, format, arguments);
     va_end(arguments);
 
     *diagnostic = (pm_diagnostic_t) {
-        .location = { start, end },
+        .location = { .start = start, .length = length },
         .diag_id = diag_id,
         .message = message,
-        .owned = true,
-        .level = pm_diagnostic_level(diag_id)
+        .level = pm_diagnostic_id_level(diag_id)
     };
 
     pm_list_append(list, (pm_list_node_t *) diagnostic);
-    return true;
-}
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- */
-void
-pm_diagnostic_list_free(pm_list_t *list) {
-    pm_diagnostic_t *node = (pm_diagnostic_t *) list->head;
-
-    while (node != NULL) {
-        pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next;
-
-        if (node->owned) xfree((void *) node->message);
-        xfree(node);
-
-        node = next;
-    }
 }
diff --git a/prism/templates/src/json.c.erb b/prism/templates/src/json.c.erb
new file mode 100644
index 0000000000..5c4ab8d92a
--- /dev/null
+++ b/prism/templates/src/json.c.erb
@@ -0,0 +1,130 @@
+#include "prism/json.h"
+
+// Ensure this translation unit is never empty, even when JSON is excluded.
+typedef int pm_json_unused_t;
+
+#ifndef PRISM_EXCLUDE_JSON
+
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+
+#include <inttypes.h>
+
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+    const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+    pm_buffer_append_byte(buffer, '"');
+    pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+    pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) {
+    pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length);
+}
+
+/**
+ * Dump JSON to the given buffer.
+ */
+void
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        <%- nodes.each do |node| -%>
+        case <%= node.type %>: {
+            pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
+            const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+            pm_dump_json_location(buffer, &cast->base.location);
+            <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
+
+            // Dump the <%= field.name %> field
+            pm_buffer_append_byte(buffer, ',');
+            <%- if field.is_a?(Prism::Template::Flags) -%>
+            pm_buffer_append_string(buffer, "\"flags\":", 8);
+            <%- else -%>
+            pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+            <%- end -%>
+            <%- case field -%>
+            <%- when Prism::Template::NodeField -%>
+            pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalNodeField -%>
+            if (cast-><%= field.name %> != NULL) {
+                pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::NodeListField -%>
+            const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '[');
+
+            for (size_t index = 0; index < <%= field.name %>->size; index++) {
+                if (index != 0) pm_buffer_append_byte(buffer, ',');
+                pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+            }
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::StringField -%>
+            const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+            pm_buffer_append_byte(buffer, '"');
+            <%- when Prism::Template::ConstantField -%>
+            pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalConstantField -%>
+            if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+                pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::ConstantListField -%>
+            const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '[');
+
+            for (size_t index = 0; index < <%= field.name %>->size; index++) {
+                if (index != 0) pm_buffer_append_byte(buffer, ',');
+                pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+            }
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::LocationField -%>
+            pm_dump_json_location(buffer, &cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalLocationField -%>
+            if (cast-><%= field.name %>.length != 0) {
+                pm_dump_json_location(buffer, &cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::UInt8Field -%>
+            pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+            <%- when Prism::Template::UInt32Field -%>
+            pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+            <%- when Prism::Template::Flags -%>
+            size_t flags = 0;
+            pm_buffer_append_byte(buffer, '[');
+            <%- node.flags.values.each_with_index do |value, index| -%>
+            if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
+                if (flags != 0) pm_buffer_append_byte(buffer, ',');
+                pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+                flags++;
+            }
+            <%- end -%>
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::IntegerField -%>
+            pm_integer_string(buffer, &cast-><%= field.name %>);
+            <%- when Prism::Template::DoubleField -%>
+            pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
+            <%- else -%>
+            <%- raise %>
+            <%- end -%>
+            <%- end -%>
+
+            pm_buffer_append_byte(buffer, '}');
+            break;
+        }
+        <%- end -%>
+        case PM_SCOPE_NODE:
+            break;
+    }
+}
+
+#endif
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 2357e55200..f51aff6e53 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -1,153 +1,85 @@
 #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
-#include "prism/node.h"
+#include "prism/internal/node.h"
+
+#include "prism/internal/arena.h"
+
+#include <stdlib.h>
 
 /**
  * Attempts to grow the node list to the next size. If there is already
- * capacity in the list, this function does nothing. Otherwise it reallocates
- * the list to be twice as large as it was before. If the reallocation fails,
- * this function returns false, otherwise it returns true.
+ * capacity in the list, this function does nothing. Otherwise it allocates a
+ * new array from the arena (abandon-and-copy strategy) and copies the existing
+ * data into it.
  */
-static bool
-pm_node_list_grow(pm_node_list_t *list, size_t size) {
+static void
+pm_node_list_grow(pm_arena_t *arena, pm_node_list_t *list, size_t size) {
     size_t requested_size = list->size + size;
 
-    // If the requested size caused overflow, return false.
-    if (requested_size < list->size) return false;
+    // Guard against overflow on the addition.
+    if (requested_size < list->size) abort();
 
-    // If the requested size is within the existing capacity, return true.
-    if (requested_size < list->capacity) return true;
+    // If the requested size is within the existing capacity, return.
+    if (requested_size <= list->capacity) return;
 
-    // Otherwise, reallocate the list to be twice as large as it was before.
+    // Otherwise, compute the next capacity by doubling.
     size_t next_capacity = list->capacity == 0 ? 4 : list->capacity * 2;
 
-    // If multiplying by 2 caused overflow, return false.
-    if (next_capacity < list->capacity) return false;
-
-    // If we didn't get enough by doubling, keep doubling until we do.
+    // Guard against overflow on the doubling.
     while (requested_size > next_capacity) {
-        size_t double_capacity = next_capacity * 2;
-
-        // Ensure we didn't overflow by multiplying by 2.
-        if (double_capacity < next_capacity) return false;
-        next_capacity = double_capacity;
+        if (next_capacity == 0) abort();
+        next_capacity *= 2;
     }
 
-    pm_node_t **nodes = (pm_node_t **) xrealloc(list->nodes, sizeof(pm_node_t *) * next_capacity);
-    if (nodes == NULL) return false;
+    // Allocate a new array from the arena (old array is abandoned).
+    pm_node_t **nodes = (pm_node_t **) pm_arena_alloc(arena, sizeof(pm_node_t *) * next_capacity, PRISM_ALIGNOF(pm_node_t *));
+
+    // Copy old data into the new array.
+    if (list->size > 0) {
+        memcpy(nodes, list->nodes, list->size * sizeof(pm_node_t *));
+    }
 
     list->nodes = nodes;
     list->capacity = next_capacity;
-    return true;
 }
 
 /**
- * Append a new node onto the end of the node list.
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly - use pm_node_list_append instead.
  */
 void
-pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
-    if (pm_node_list_grow(list, 1)) {
-        list->nodes[list->size++] = node;
-    }
+pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+    pm_node_list_grow(arena, list, 1);
+    list->nodes[list->size++] = node;
 }
 
 /**
  * Prepend a new node onto the beginning of the node list.
  */
 void
-pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node) {
-    if (pm_node_list_grow(list, 1)) {
-        memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
-        list->nodes[0] = node;
-        list->size++;
-    }
+pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+    pm_node_list_grow(arena, list, 1);
+    memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
+    list->nodes[0] = node;
+    list->size++;
 }
 
 /**
  * Concatenate the given node list onto the end of the other node list.
  */
 void
-pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other) {
-    if (other->size > 0 && pm_node_list_grow(list, other->size)) {
+pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other) {
+    if (other->size > 0) {
+        pm_node_list_grow(arena, list, other->size);
         memcpy(list->nodes + list->size, other->nodes, other->size * sizeof(pm_node_t *));
         list->size += other->size;
     }
 }
 
 /**
- * Free the internal memory associated with the given node list.
- */
-void
-pm_node_list_free(pm_node_list_t *list) {
-    if (list->capacity > 0) {
-        xfree(list->nodes);
-        *list = (pm_node_list_t) { 0 };
-    }
-}
-
-PRISM_EXPORTED_FUNCTION void
-pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
-
-/**
- * Destroy the nodes that are contained within the given node list.
- */
-static void
-pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
-    pm_node_t *node;
-    PM_NODE_LIST_FOREACH(list, index, node) pm_node_destroy(parser, node);
-    pm_node_list_free(list);
-}
-
-/**
- * Deallocate the space for a pm_node_t. Similarly to pm_node_alloc, we're not
- * using the parser argument, but it's there to allow for the future possibility
- * of pre-allocating larger memory pools.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
-    switch (PM_NODE_TYPE(node)) {
-        <%- nodes.each do |node| -%>
-#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
-        case <%= node.type %>: {
-            <%- if node.fields.any? { |field| ![Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField].include?(field.class) } -%>
-            pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
-            <%- end -%>
-            <%- node.fields.each do |field| -%>
-            <%- case field -%>
-            <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField -%>
-            <%- when Prism::Template::NodeField -%>
-            pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
-            <%- when Prism::Template::OptionalNodeField -%>
-            if (cast-><%= field.name %> != NULL) {
-                pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
-            }
-            <%- when Prism::Template::StringField -%>
-            pm_string_free(&cast-><%= field.name %>);
-            <%- when Prism::Template::NodeListField -%>
-            pm_node_list_destroy(parser, &cast-><%= field.name %>);
-            <%- when Prism::Template::ConstantListField -%>
-            pm_constant_id_list_free(&cast-><%= field.name %>);
-            <%- when Prism::Template::IntegerField -%>
-            pm_integer_free(&cast-><%= field.name %>);
-            <%- else -%>
-            <%- raise -%>
-            <%- end -%>
-            <%- end -%>
-            break;
-        }
-        <%- end -%>
-#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
-        default:
-            assert(false && "unreachable");
-            break;
-    }
-    xfree(node);
-}
-
-/**
  * Returns a string representation of the given node type.
  */
-PRISM_EXPORTED_FUNCTION const char *
-pm_node_type_to_str(pm_node_type_t node_type)
+const char *
+pm_node_type(pm_node_type_t node_type)
 {
     switch (node_type) {
 <%- nodes.each do |node| -%>
@@ -166,7 +98,7 @@ pm_node_type_to_str(pm_node_type_t node_type)
  * pointer and is passed to the visitor callback for consumers to use as they
  * see fit.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
     if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data);
 }
@@ -176,7 +108,7 @@ pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void
  * default behavior for walking the tree that is called from pm_visit_node if
  * the callback returns true.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
     switch (PM_NODE_TYPE(node)) {
         <%- nodes.each do |node| -%>
@@ -212,122 +144,23 @@ pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *nod
             break;
     }
 }
+<%- nodes.each do |node| -%>
 
-// We optionally support dumping to JSON. For systems that don't want or need
-// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
-#ifndef PRISM_EXCLUDE_JSON
-
-static void
-pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
-    const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
-    pm_buffer_append_byte(buffer, '"');
-    pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
-    pm_buffer_append_byte(buffer, '"');
-}
-
-static void
-pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
-    uint32_t start = (uint32_t) (location->start - parser->start);
-    uint32_t end = (uint32_t) (location->end - parser->start);
-    pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
-}
-
+<%- params = node.fields.map(&:c_param) -%>
 /**
- * Dump JSON to the given buffer.
+ * Allocate and initialize a new <%= node.name %> node.
  */
-PRISM_EXPORTED_FUNCTION void
-pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
-    switch (PM_NODE_TYPE(node)) {
-        <%- nodes.each do |node| -%>
-        case <%= node.type %>: {
-            pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
-
-            const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
-            pm_dump_json_location(buffer, parser, &cast->base.location);
-            <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
-
-            // Dump the <%= field.name %> field
-            pm_buffer_append_byte(buffer, ',');
-            pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
-            <%- case field -%>
-            <%- when Prism::Template::NodeField -%>
-            pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
-            <%- when Prism::Template::OptionalNodeField -%>
-            if (cast-><%= field.name %> != NULL) {
-                pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
-            } else {
-                pm_buffer_append_string(buffer, "null", 4);
-            }
-            <%- when Prism::Template::NodeListField -%>
-            const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
-            pm_buffer_append_byte(buffer, '[');
-
-            for (size_t index = 0; index < <%= field.name %>->size; index++) {
-                if (index != 0) pm_buffer_append_byte(buffer, ',');
-                pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
-            }
-            pm_buffer_append_byte(buffer, ']');
-            <%- when Prism::Template::StringField -%>
-            const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
-            pm_buffer_append_byte(buffer, '"');
-            pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
-            pm_buffer_append_byte(buffer, '"');
-            <%- when Prism::Template::ConstantField -%>
-            pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
-            <%- when Prism::Template::OptionalConstantField -%>
-            if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
-                pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
-            } else {
-                pm_buffer_append_string(buffer, "null", 4);
-            }
-            <%- when Prism::Template::ConstantListField -%>
-            const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
-            pm_buffer_append_byte(buffer, '[');
-
-            for (size_t index = 0; index < <%= field.name %>->size; index++) {
-                if (index != 0) pm_buffer_append_byte(buffer, ',');
-                pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
-            }
-            pm_buffer_append_byte(buffer, ']');
-            <%- when Prism::Template::LocationField -%>
-            pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
-            <%- when Prism::Template::OptionalLocationField -%>
-            if (cast-><%= field.name %>.start != NULL) {
-                pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
-            } else {
-                pm_buffer_append_string(buffer, "null", 4);
-            }
-            <%- when Prism::Template::UInt8Field -%>
-            pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
-            <%- when Prism::Template::UInt32Field -%>
-            pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
-            <%- when Prism::Template::Flags -%>
-            size_t flags = 0;
-            pm_buffer_append_byte(buffer, '[');
-            <%- node.flags.values.each_with_index do |value, index| -%>
-            if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
-                if (flags != 0) pm_buffer_append_byte(buffer, ',');
-                pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
-                flags++;
-            }
-            <%- end -%>
-            pm_buffer_append_byte(buffer, ']');
-            <%- when Prism::Template::IntegerField -%>
-            pm_integer_string(buffer, &cast-><%= field.name %>);
-            <%- when Prism::Template::DoubleField -%>
-            pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
-            <%- else -%>
-            <%- raise %>
-            <%- end -%>
-            <%- end -%>
+pm_<%= node.human %>_t *
+pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) {
+    pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t));
+
+    *node = (pm_<%= node.human %>_t) {
+        .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %>
+<%- node.fields.each_with_index do |field, index| -%>
+        .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %>
+<%- end -%>
+    };
 
-            pm_buffer_append_byte(buffer, '}');
-            break;
-        }
-        <%- end -%>
-        case PM_SCOPE_NODE:
-            break;
-    }
+    return node;
 }
-
-#endif
+<%- end -%>
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
index 639c2fecf3..f12531d934 100644
--- a/prism/templates/src/prettyprint.c.erb
+++ b/prism/templates/src/prettyprint.c.erb
@@ -1,23 +1,34 @@
 <%# encoding: ASCII -%>
 #include "prism/prettyprint.h"
 
-// We optionally support pretty printing nodes. For systems that don't want or
-// need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PRETTYPRINT define.
+/* We optionally support pretty printing nodes. For systems that don't want or
+ * need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_PRETTYPRINT define. */
 #ifdef PRISM_EXCLUDE_PRETTYPRINT
 
-void pm_prettyprint(void) {}
+/* Ensure this translation unit is never empty, even when prettyprint is
+ * excluded. */
+typedef int pm_prettyprint_unused_t;
 
 #else
 
-static inline void
+#include "prism/compiler/inline.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+#include "prism/line_offset_list.h"
+
+#include <inttypes.h>
+
+static PRISM_INLINE void
 prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
-    pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
-    pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line);
+    pm_line_column_t start = pm_line_offset_list_line_column(&parser->line_offsets, location->start, parser->start_line);
+    pm_line_column_t end = pm_line_offset_list_line_column(&parser->line_offsets, location->start + location->length, parser->start_line);
     pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
 }
 
-static inline void
+static PRISM_INLINE void
 prettyprint_constant(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_constant_id_t constant_id) {
     pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
     pm_buffer_append_format(output_buffer, ":%.*s", (int) constant->length, constant->start);
@@ -106,17 +117,17 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
                 pm_buffer_append_byte(output_buffer, ' ');
                 prettyprint_location(output_buffer, parser, location);
                 pm_buffer_append_string(output_buffer, " = \"", 4);
-                pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+                pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
                 pm_buffer_append_string(output_buffer, "\"\n", 2);
             <%- when Prism::Template::OptionalLocationField -%>
                 pm_location_t *location = &cast-><%= field.name %>;
-                if (location->start == NULL) {
+                if (location->length == 0) {
                     pm_buffer_append_string(output_buffer, " nil\n", 5);
                 } else {
                     pm_buffer_append_byte(output_buffer, ' ');
                     prettyprint_location(output_buffer, parser, location);
                     pm_buffer_append_string(output_buffer, " = \"", 4);
-                    pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+                    pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
                     pm_buffer_append_string(output_buffer, "\"\n", 2);
                 }
             <%- when Prism::Template::UInt8Field -%>
@@ -156,11 +167,11 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
 /**
  * Pretty-prints the AST represented by the given node to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) {
     pm_buffer_t prefix_buffer = { 0 };
     prettyprint_node(output_buffer, parser, node, &prefix_buffer);
-    pm_buffer_free(&prefix_buffer);
+    pm_buffer_cleanup(&prefix_buffer);
 }
 
 #endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index 3e15a11039..3d9811e5db 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -1,57 +1,58 @@
-#include "prism.h"
+#include "prism/excludes.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifdef PRISM_EXCLUDE_SERIALIZATION
+
+/* Ensure this translation unit is never empty, even when serialization is
+ * excluded. */
+typedef int pm_serialize_unused_t;
+
+#else
+
+#include "prism/compiler/inline.h"
 
-// We optionally support serializing to a binary string. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_SERIALIZATION define.
-#ifndef PRISM_EXCLUDE_SERIALIZATION
+#include "prism/internal/buffer.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
 
+#include "prism.h"
+#include "prism/ast.h"
+#include "prism/line_offset_list.h"
+
+#include <assert.h>
 #include <stdio.h>
+#include <string.h>
 
-static inline uint32_t
+static PRISM_INLINE uint32_t
 pm_ptrdifft_to_u32(ptrdiff_t value) {
     assert(value >= 0 && ((unsigned long) value) < UINT32_MAX);
     return (uint32_t) value;
 }
 
-static inline uint32_t
+static PRISM_INLINE uint32_t
 pm_sizet_to_u32(size_t value) {
     assert(value < UINT32_MAX);
     return (uint32_t) value;
 }
 
 static void
-pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) {
-    assert(location->start);
-    assert(location->end);
-    assert(location->start <= location->end);
-
-    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->start - parser->start));
-    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->end - location->start));
+pm_serialize_location(const pm_location_t *location, pm_buffer_t *buffer) {
+    pm_buffer_append_varuint(buffer, location->start);
+    pm_buffer_append_varuint(buffer, location->length);
 }
 
 static void
-pm_serialize_string(const pm_parser_t *parser, const pm_string_t *string, pm_buffer_t *buffer) {
-    switch (string->type) {
-        case PM_STRING_SHARED: {
-            pm_buffer_append_byte(buffer, 1);
-            pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(pm_string_source(string) - parser->start));
-            pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_string_length(string)));
-            break;
-        }
-        case PM_STRING_OWNED:
-        case PM_STRING_CONSTANT: {
-            uint32_t length = pm_sizet_to_u32(pm_string_length(string));
-            pm_buffer_append_byte(buffer, 2);
-            pm_buffer_append_varuint(buffer, length);
-            pm_buffer_append_bytes(buffer, pm_string_source(string), length);
-            break;
-        }
-#ifdef PRISM_HAS_MMAP
-        case PM_STRING_MAPPED:
-            assert(false && "Cannot serialize mapped strings.");
-            break;
-#endif
-    }
+pm_serialize_string(const pm_string_t *string, pm_buffer_t *buffer) {
+    uint32_t length = pm_sizet_to_u32(pm_string_length(string));
+    pm_buffer_append_varuint(buffer, length);
+    pm_buffer_append_bytes(buffer, pm_string_source(string), length);
 }
 
 static void
@@ -72,12 +73,10 @@ static void
 pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
 
-    size_t offset = buffer->length;
-
     <%- if Prism::Template::INCLUDE_NODE_ID -%>
     pm_buffer_append_varuint(buffer, node->node_id);
     <%- end -%>
-    pm_serialize_location(parser, &node->location, buffer);
+    pm_serialize_location(&node->location, buffer);
 
     switch (PM_NODE_TYPE(node)) {
         // We do not need to serialize a ScopeNode ever as
@@ -106,7 +105,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
                 pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
             }
             <%- when Prism::Template::StringField -%>
-            pm_serialize_string(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            pm_serialize_string(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
             <%- when Prism::Template::NodeListField -%>
             uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
             pm_buffer_append_varuint(buffer, <%= field.name %>_size);
@@ -123,15 +122,15 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             }
             <%- when Prism::Template::LocationField -%>
             <%- if field.should_be_serialized? -%>
-            pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
             <%- end -%>
             <%- when Prism::Template::OptionalLocationField -%>
             <%- if field.should_be_serialized? -%>
-            if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
+            if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) {
                 pm_buffer_append_byte(buffer, 0);
             } else {
                 pm_buffer_append_byte(buffer, 1);
-                pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+                pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
             }
             <%- end -%>
             <%- when Prism::Template::UInt8Field -%>
@@ -148,7 +147,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             <%- end -%>
             <%- if node.needs_serialized_length? -%>
             // serialize length
-            uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
+            uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
             memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
             <%- end -%>
             break;
@@ -158,7 +157,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
 }
 
 static void
-pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_line_offset_list(pm_line_offset_list_t *list, pm_buffer_t *buffer) {
     uint32_t size = pm_sizet_to_u32(list->size);
     pm_buffer_append_varuint(buffer, size);
 
@@ -169,60 +168,60 @@ pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
 }
 
 static void
-pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
+pm_serialize_comment(pm_comment_t *comment, pm_buffer_t *buffer) {
     // serialize type
     pm_buffer_append_byte(buffer, (uint8_t) comment->type);
 
     // serialize location
-    pm_serialize_location(parser, &comment->location, buffer);
+    pm_serialize_location(&comment->location, buffer);
 }
 
 /**
  * Serialize the given list of comments to the given buffer.
  */
 void
-pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
     pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
 
     pm_comment_t *comment;
     for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
-        pm_serialize_comment(parser, comment, buffer);
+        pm_serialize_comment(comment, buffer);
     }
 }
 
 static void
-pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+pm_serialize_magic_comment(pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
     // serialize key location
-    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
-    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->key_length));
+    pm_buffer_append_varuint(buffer, magic_comment->key.start);
+    pm_buffer_append_varuint(buffer, magic_comment->key.length);
 
     // serialize value location
-    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
-    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->value_length));
+    pm_buffer_append_varuint(buffer, magic_comment->value.start);
+    pm_buffer_append_varuint(buffer, magic_comment->value.length);
 }
 
 static void
-pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_magic_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
     pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
 
     pm_magic_comment_t *magic_comment;
     for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
-        pm_serialize_magic_comment(parser, magic_comment, buffer);
+        pm_serialize_magic_comment(magic_comment, buffer);
     }
 }
 
 static void
 pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
-    if (parser->data_loc.end == NULL) {
+    if (parser->data_loc.length == 0) {
         pm_buffer_append_byte(buffer, 0);
     } else {
         pm_buffer_append_byte(buffer, 1);
-        pm_serialize_location(parser, &parser->data_loc, buffer);
+        pm_serialize_location(&parser->data_loc, buffer);
     }
 }
 
 static void
-pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
+pm_serialize_diagnostic(pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
     // serialize the type
     pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id);
 
@@ -232,18 +231,18 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
     pm_buffer_append_string(buffer, diagnostic->message, message_length);
 
     // serialize location
-    pm_serialize_location(parser, &diagnostic->location, buffer);
+    pm_serialize_location(&diagnostic->location, buffer);
 
     pm_buffer_append_byte(buffer, diagnostic->level);
 }
 
 static void
-pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_diagnostic_list(pm_list_t *list, pm_buffer_t *buffer) {
     pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
 
     pm_diagnostic_t *diagnostic;
     for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
-        pm_serialize_diagnostic(parser, diagnostic, buffer);
+        pm_serialize_diagnostic(diagnostic, buffer);
     }
 }
 
@@ -261,14 +260,15 @@ static void
 pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
     pm_serialize_encoding(parser->encoding, buffer);
     pm_buffer_append_varsint(buffer, parser->start_line);
-    pm_serialize_newline_list(&parser->newline_list, buffer);
+    pm_serialize_line_offset_list(&parser->line_offsets, buffer);
 <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
-    pm_serialize_comment_list(parser, &parser->comment_list, buffer);
+    pm_serialize_comment_list(&parser->comment_list, buffer);
 <%- end -%>
-    pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
+    pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer);
     pm_serialize_data_loc(parser, buffer);
-    pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
-    pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
+    pm_serialize_diagnostic_list(&parser->error_list, buffer);
+    pm_serialize_diagnostic_list(&parser->warning_list, buffer);
+    pm_buffer_append_byte(buffer, (uint8_t) parser->continuable);
 }
 
 #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
@@ -308,28 +308,12 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
             pm_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1];
             size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8);
 
-            if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED || bucket->type == PM_CONSTANT_POOL_BUCKET_CONSTANT) {
-                // Since this is an owned or constant constant, we are going to
-                // write its contents into the buffer after the constant pool.
-                // So effectively in place of the source offset, we have a
-                // buffer offset. We will add a leading 1 to indicate that this
-                // is a buffer offset.
-                uint32_t content_offset = pm_sizet_to_u32(buffer->length);
-                uint32_t owned_mask = (uint32_t) (1 << 31);
+            // Write the constant contents into the buffer after the constant
+            // pool. In place of the source offset, we store a buffer offset.
+            uint32_t content_offset = pm_sizet_to_u32(buffer->length);
+            memcpy(buffer->value + buffer_offset, &content_offset, 4);
+            pm_buffer_append_bytes(buffer, constant->start, constant->length);
 
-                assert(content_offset < owned_mask);
-                content_offset |= owned_mask;
-
-                memcpy(buffer->value + buffer_offset, &content_offset, 4);
-                pm_buffer_append_bytes(buffer, constant->start, constant->length);
-            } else {
-                // Since this is a shared constant, we are going to write its
-                // source offset directly into the buffer.
-                uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start);
-                memcpy(buffer->value + buffer_offset, &source_offset, 4);
-            }
-
-            // Now we can write the length of the constant into the buffer.
             uint32_t constant_length = pm_sizet_to_u32(constant->length);
             memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
         }
@@ -337,7 +321,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
 }
 
 static void
-serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
+serialize_token(pm_parser_t *parser, pm_token_t *token, void *data) {
     pm_buffer_t *buffer = (pm_buffer_t *) data;
 
     pm_buffer_append_varuint(buffer, token->type);
@@ -349,58 +333,72 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
 /**
  * Lex the given source and serialize to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
     pm_options_t options = { 0 };
     pm_options_read(&options, data);
 
+    pm_arena_t arena = { 0 };
     pm_parser_t parser;
-    pm_parser_init(&parser, source, size, &options);
+    pm_parser_init(&arena, &parser, source, size, &options);
 
-    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
-        .data = (void *) buffer,
-        .callback = serialize_token,
-    };
-
-    parser.lex_callback = &lex_callback;
-    pm_node_t *node = pm_parse(&parser);
+    pm_parser_lex_callback_set(&parser, serialize_token, buffer);
+    pm_parse(&parser);
 
     // Append 0 to mark end of tokens.
     pm_buffer_append_byte(buffer, 0);
 
     pm_serialize_metadata(&parser, buffer);
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+    pm_parser_cleanup(&parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
 }
 
 /**
  * Parse and serialize both the AST and the tokens represented by the given
  * source to the given buffer.
  */
-PRISM_EXPORTED_FUNCTION void
+void
 pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
     pm_options_t options = { 0 };
     pm_options_read(&options, data);
 
+    pm_arena_t arena = { 0 };
     pm_parser_t parser;
-    pm_parser_init(&parser, source, size, &options);
-
-    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
-        .data = (void *) buffer,
-        .callback = serialize_token,
-    };
+    pm_parser_init(&arena, &parser, source, size, &options);
 
-    parser.lex_callback = &lex_callback;
+    pm_parser_lex_callback_set(&parser, serialize_token, buffer);
     pm_node_t *node = pm_parse(&parser);
 
     pm_buffer_append_byte(buffer, 0);
     pm_serialize(&parser, node, buffer);
 
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
+    pm_parser_cleanup(&parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
+}
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ */
+bool
+pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_arena_t arena = { 0 };
+    pm_parser_t parser;
+    pm_parser_init(&arena, &parser, source, size, &options);
+
+    pm_parse(&parser);
+
+    bool result = parser.error_list.size == 0;
+    pm_parser_cleanup(&parser);
+    pm_arena_cleanup(&arena);
+    pm_options_cleanup(&options);
+
+    return result;
 }
 
 #endif
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/tokens.c.erb
index f196393ee1..1e82954738 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/tokens.c.erb
@@ -1,12 +1,12 @@
-#include <string.h>
-
 #include "prism/ast.h"
 
+#include <assert.h>
+
 /**
  * Returns a string representation of the given token type.
  */
-PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_name(pm_token_type_t token_type) {
+const char *
+pm_token_type(pm_token_type_t token_type) {
     switch (token_type) {
 <%- tokens.each do |token| -%>
         case PM_TOKEN_<%= token.name %>:
@@ -27,14 +27,10 @@ pm_token_type_name(pm_token_type_t token_type) {
  * Returns the human name of the given token type.
  */
 const char *
-pm_token_type_human(pm_token_type_t token_type) {
+pm_token_str(pm_token_type_t token_type) {
     switch (token_type) {
         case PM_TOKEN_EOF:
             return "end-of-input";
-        case PM_TOKEN_MISSING:
-            return "missing token";
-        case PM_TOKEN_NOT_PROVIDED:
-            return "not provided token";
         case PM_TOKEN_AMPERSAND:
             return "'&'";
         case PM_TOKEN_AMPERSAND_AMPERSAND:
@@ -171,6 +167,8 @@ pm_token_type_human(pm_token_type_t token_type) {
             return "'defined?'";
         case PM_TOKEN_KEYWORD_DO:
             return "'do'";
+        case PM_TOKEN_KEYWORD_DO_BLOCK:
+            return "'do'";
         case PM_TOKEN_KEYWORD_DO_LOOP:
             return "'do'";
         case PM_TOKEN_KEYWORD_ELSE:
@@ -362,8 +360,8 @@ pm_token_type_human(pm_token_type_t token_type) {
             return "";
     }
 
-    // Provide a default, because some compilers can't determine that the above
-    // switch is exhaustive.
+    /* Provide a default, because some compilers cannot determine that the above
+     * switch is exhaustive. */
     assert(false && "unreachable");
     return "";
 }
diff --git a/prism/templates/template.rb b/prism/templates/template.rb
index 30cb60cabd..0fdeda561f 100755
--- a/prism/templates/template.rb
+++ b/prism/templates/template.rb
@@ -6,13 +6,12 @@ require "fileutils"
 require "yaml"
 
 module Prism
-  module Template
+  module Template # :nodoc: all
     SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
-    REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS
     CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
 
-    JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
-    JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
+    JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default"
+    JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]"
     INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
 
     COMMON_FLAGS_COUNT = 2
@@ -49,6 +48,14 @@ module Prism
       end
     end
 
+    # This module contains methods for escaping characters in Doxygen comments.
+    module Doxygen
+      # Similar to /verbatim ... /endverbatim but doesn't wrap the result in a code block.
+      def self.verbatim(value)
+        value.gsub(/[*%!`#<>_+@-]/, '\\\\\0')
+      end
+    end
+
     # A comment attached to a field or node.
     class ConfigComment
       attr_reader :value
@@ -97,6 +104,11 @@ module Prism
     # Some node fields can be specialized if they point to a specific kind of
     # node and not just a generic node.
     class NodeKindField < Field
+      # The C type to use for this field as a function parameter.
+      def c_param
+        "struct #{c_type} *#{name}"
+      end
+
       def initialize(kind:, **options)
         @kind = kind
         super(**options)
@@ -142,27 +154,27 @@ module Prism
         if specific_kind
           specific_kind
         elsif union_kind
-          union_kind.join(" | ")
+          "(#{union_kind.join(" | ")})"
         else
           "Prism::node"
         end
       end
 
-      def rbi_class
+      def call_seq_type
         if specific_kind
-          "Prism::#{specific_kind}"
+          specific_kind
         elsif union_kind
-          "T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})"
+          union_kind.join(" | ")
         else
-          "Prism::Node"
+          "Node"
         end
       end
 
       def check_field_kind
         if union_kind
-          "[#{union_kind.join(', ')}].include?(#{name}.class)"
+          "[#{union_kind.join(', ')}, ErrorRecoveryNode].include?(#{name}.class)"
         else
-          "#{name}.is_a?(#{ruby_type})"
+          "#{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
         end
       end
     end
@@ -174,27 +186,27 @@ module Prism
         if specific_kind
           "#{specific_kind}?"
         elsif union_kind
-          [*union_kind, "nil"].join(" | ")
+          "(#{union_kind.join(" | ")})?"
         else
           "Prism::node?"
         end
       end
 
-      def rbi_class
+      def call_seq_type
         if specific_kind
-          "T.nilable(Prism::#{specific_kind})"
+          "#{specific_kind} | nil"
         elsif union_kind
-          "T.nilable(T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")}))"
+          [*union_kind, "nil"].join(" | ")
         else
-          "T.nilable(Prism::Node)"
+          "Node | nil"
         end
       end
 
       def check_field_kind
         if union_kind
-          "[#{union_kind.join(', ')}, NilClass].include?(#{name}.class)"
+          "[#{union_kind.join(', ')}, ErrorRecoveryNode, NilClass].include?(#{name}.class)"
         else
-          "#{name}.nil? || #{name}.is_a?(#{ruby_type})"
+          "#{name}.nil? || #{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
         end
       end
     end
@@ -202,23 +214,31 @@ module Prism
     # This represents a field on a node that is a list of nodes. We pass them as
     # references and store them directly on the struct.
     class NodeListField < NodeKindField
-      def rbs_class
+      def c_param
+        "pm_node_list_t #{name}"
+      end
+
+      def element_rbs_class
         if specific_kind
-          "Array[#{specific_kind}]"
+          "#{specific_kind}"
         elsif union_kind
-          "Array[#{union_kind.join(" | ")}]"
+          "#{union_kind.join(" | ")}"
         else
-          "Array[Prism::node]"
+          "Prism::node"
         end
       end
 
-      def rbi_class
+      def rbs_class
+        "Array[#{element_rbs_class}]"
+      end
+
+      def call_seq_type
         if specific_kind
-          "T::Array[Prism::#{specific_kind}]"
+          "Array[#{specific_kind}]"
         elsif union_kind
-          "T::Array[T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})]"
+          "Array[#{union_kind.join(" | ")}]"
         else
-          "T::Array[Prism::Node]"
+          "Array[Node]"
         end
       end
 
@@ -228,9 +248,9 @@ module Prism
 
       def check_field_kind
         if union_kind
-          "#{name}.all? { |n| [#{union_kind.join(', ')}].include?(n.class) }"
+          "#{name}.all? { |n| [#{union_kind.join(', ')}, ErrorRecoveryNode].include?(n.class) }"
         else
-          "#{name}.all? { |n| n.is_a?(#{ruby_type}) }"
+          "#{name}.all? { |n| n.is_a?(#{ruby_type}) || n.is_a?(ErrorRecoveryNode) }"
         end
       end
     end
@@ -238,58 +258,74 @@ module Prism
     # This represents a field on a node that is the ID of a string interned
     # through the parser's constant pool.
     class ConstantField < Field
+      def c_param
+        "pm_constant_id_t #{name}"
+      end
+
       def rbs_class
         "Symbol"
       end
 
-      def rbi_class
+      def call_seq_type
         "Symbol"
       end
 
       def java_type
-        JAVA_STRING_TYPE
+        JAVA_IDENTIFIER_TYPE
       end
     end
 
     # This represents a field on a node that is the ID of a string interned
     # through the parser's constant pool and can be optionally null.
     class OptionalConstantField < Field
+      def c_param
+        "pm_constant_id_t #{name}"
+      end
+
       def rbs_class
         "Symbol?"
       end
 
-      def rbi_class
-        "T.nilable(Symbol)"
+      def call_seq_type
+        "Symbol | nil"
       end
 
       def java_type
-        JAVA_STRING_TYPE
+        JAVA_IDENTIFIER_TYPE
       end
     end
 
     # This represents a field on a node that is a list of IDs that are associated
     # with strings interned through the parser's constant pool.
     class ConstantListField < Field
+      def c_param
+        "pm_constant_id_list_t #{name}"
+      end
+
       def rbs_class
         "Array[Symbol]"
       end
 
-      def rbi_class
-        "T::Array[Symbol]"
+      def call_seq_type
+        "Array[Symbol]"
       end
 
       def java_type
-        "#{JAVA_STRING_TYPE}[]"
+        "#{JAVA_IDENTIFIER_TYPE}[]"
       end
     end
 
     # This represents a field on a node that is a string.
     class StringField < Field
+      def c_param
+        "pm_string_t #{name}"
+      end
+
       def rbs_class
         "String"
       end
 
-      def rbi_class
+      def call_seq_type
         "String"
       end
 
@@ -300,6 +336,10 @@ module Prism
 
     # This represents a field on a node that is a location.
     class LocationField < Field
+      def c_param
+        "pm_location_t #{name}"
+      end
+
       def semantic_field?
         false
       end
@@ -308,8 +348,8 @@ module Prism
         "Location"
       end
 
-      def rbi_class
-        "Prism::Location"
+      def call_seq_type
+        "Location"
       end
 
       def java_type
@@ -319,6 +359,10 @@ module Prism
 
     # This represents a field on a node that is a location that is optional.
     class OptionalLocationField < Field
+      def c_param
+        "pm_location_t #{name}"
+      end
+
       def semantic_field?
         false
       end
@@ -327,8 +371,8 @@ module Prism
         "Location?"
       end
 
-      def rbi_class
-        "T.nilable(Prism::Location)"
+      def call_seq_type
+        "Location | nil"
       end
 
       def java_type
@@ -338,11 +382,15 @@ module Prism
 
     # This represents an integer field.
     class UInt8Field < Field
+      def c_param
+        "uint8_t #{name}"
+      end
+
       def rbs_class
         "Integer"
       end
 
-      def rbi_class
+      def call_seq_type
         "Integer"
       end
 
@@ -353,11 +401,15 @@ module Prism
 
     # This represents an integer field.
     class UInt32Field < Field
+      def c_param
+        "uint32_t #{name}"
+      end
+
       def rbs_class
         "Integer"
       end
 
-      def rbi_class
+      def call_seq_type
         "Integer"
       end
 
@@ -369,11 +421,15 @@ module Prism
     # This represents an arbitrarily-sized integer. When it gets to Ruby it will
     # be an Integer.
     class IntegerField < Field
+      def c_param
+        "pm_integer_t #{name}"
+      end
+
       def rbs_class
         "Integer"
       end
 
-      def rbi_class
+      def call_seq_type
         "Integer"
       end
 
@@ -385,11 +441,15 @@ module Prism
     # This represents a double-precision floating point number. When it gets to
     # Ruby it will be a Float.
     class DoubleField < Field
+      def c_param
+        "double #{name}"
+      end
+
       def rbs_class
         "Float"
       end
 
-      def rbi_class
+      def call_seq_type
         "Float"
       end
 
@@ -432,9 +492,6 @@ module Prism
                 when "pattern expression"
                   # the list of all possible types is too long with 37+ different classes
                   "Node"
-                when Hash
-                  kind = kind.fetch("on error")
-                  REMOVE_ON_ERROR_TYPES ? nil : kind
                 else
                   kind
                 end
@@ -547,33 +604,17 @@ module Prism
         extension = File.extname(filepath.gsub(".erb", ""))
 
         heading =
-          case extension
-          when ".rb"
+          if extension == ".rb"
             <<~HEADING
             # frozen_string_literal: true
+            # :markup: markdown
 
             =begin
+            --
             This file is generated by the templates/template.rb script and should not be
             modified manually. See #{filepath}
             if you are looking to modify the template
-            =end
-
-            HEADING
-          when ".rbs"
-            <<~HEADING
-            # This file is generated by the templates/template.rb script and should not be
-            # modified manually. See #{filepath}
-            # if you are looking to modify the template
-
-            HEADING
-          when ".rbi"
-            <<~HEADING
-            # typed: strict
-
-            =begin
-            This file is generated by the templates/template.rb script and should not be
-            modified manually. See #{filepath}
-            if you are looking to modify the template
+            ++
             =end
 
             HEADING
@@ -582,7 +623,7 @@ module Prism
             /*----------------------------------------------------------------------------*/
             /* This file is generated by the templates/template.rb script and should not  */
             /* be modified manually. See                                                  */
-            /* #{filepath + " " * (74 - filepath.size) } */
+            /* #{filepath.ljust(74)} */
             /* if you are looking to modify the                                           */
             /* template                                                                   */
             /*----------------------------------------------------------------------------*/
@@ -602,8 +643,14 @@ module Prism
           end
         end
 
-        FileUtils.mkdir_p(File.dirname(write_to))
-        File.write(write_to, contents)
+        begin
+          FileUtils.mkdir_p(File.dirname(write_to))
+          File.write(write_to, contents)
+        rescue SystemCallError # EACCES, EPERM, EROFS, etc.
+          # Fall back to the current directory
+          FileUtils.mkdir_p(File.dirname(name))
+          File.write(name, contents)
+        end
       end
 
       private
@@ -639,13 +686,13 @@ module Prism
     TEMPLATES = [
       "ext/prism/api_node.c",
       "include/prism/ast.h",
-      "include/prism/diagnostic.h",
+      "include/prism/internal/diagnostic.h",
       "javascript/src/deserialize.js",
       "javascript/src/nodes.js",
       "javascript/src/visitor.js",
-      "java/org/prism/Loader.java",
-      "java/org/prism/Nodes.java",
-      "java/org/prism/AbstractNodeVisitor.java",
+      "java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java",
+      "java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java",
+      "java/api/src/main/java-templates/org/ruby_lang/prism/AbstractNodeVisitor.java",
       "lib/prism/compiler.rb",
       "lib/prism/dispatcher.rb",
       "lib/prism/dot_visitor.rb",
@@ -657,19 +704,11 @@ module Prism
       "lib/prism/serialize.rb",
       "lib/prism/visitor.rb",
       "src/diagnostic.c",
+      "src/json.c",
       "src/node.c",
       "src/prettyprint.c",
       "src/serialize.c",
-      "src/token_type.c",
-      "rbi/prism/dsl.rbi",
-      "rbi/prism/node.rbi",
-      "rbi/prism/visitor.rbi",
-      "sig/prism.rbs",
-      "sig/prism/dsl.rbs",
-      "sig/prism/mutation_compiler.rbs",
-      "sig/prism/node.rbs",
-      "sig/prism/visitor.rbs",
-      "sig/prism/_private/dot_visitor.rbs"
+      "src/tokens.c"
     ]
   end
 end
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
deleted file mode 100644
index f3c20ab2a5..0000000000
--- a/prism/util/pm_buffer.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/**
- * @file pm_buffer.h
- *
- * A wrapper around a contiguous block of allocated memory.
- */
-#ifndef PRISM_BUFFER_H
-#define PRISM_BUFFER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_char.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * A pm_buffer_t is a simple memory buffer that stores data in a contiguous
- * block of memory.
- */
-typedef struct {
-    /** The length of the buffer in bytes. */
-    size_t length;
-
-    /** The capacity of the buffer in bytes that has been allocated. */
-    size_t capacity;
-
-    /** A pointer to the start of the buffer. */
-    char *value;
-} pm_buffer_t;
-
-/**
- * Return the size of the pm_buffer_t struct.
- *
- * @returns The size of the pm_buffer_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
-
-/**
- * Initialize a pm_buffer_t with the given capacity.
- *
- * @param buffer The buffer to initialize.
- * @param capacity The capacity of the buffer.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
-
-/**
- * Initialize a pm_buffer_t with its default values.
- *
- * @param buffer The buffer to initialize.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
-
-/**
- * Return the value of the buffer.
- *
- * @param buffer The buffer to get the value of.
- * @returns The value of the buffer.
- */
-PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
-
-/**
- * Return the length of the buffer.
- *
- * @param buffer The buffer to get the length of.
- * @returns The length of the buffer.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
-
-/**
- * Append the given amount of space as zeroes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param length The amount of space to append and zero.
- */
-void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
-
-/**
- * Append a formatted string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param format The format string to append.
- * @param ... The arguments to the format string.
- */
-void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
-
-/**
- * Append a string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The string to append.
- * @param length The length of the string to append.
- */
-void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Append a list of bytes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The bytes to append.
- * @param length The length of the bytes to append.
- */
-void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
-
-/**
- * Append a single byte to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The byte to append.
- */
-void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
-
-/**
- * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * Append a 32-bit signed integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
-
-/**
- * Append a double to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The double to append.
- */
-void pm_buffer_append_double(pm_buffer_t *buffer, double value);
-
-/**
- * Append a unicode codepoint to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The character to append.
- * @returns True if the codepoint was valid and appended successfully, false
- *   otherwise.
- */
-bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * The different types of escaping that can be performed by the buffer when
- * appending a slice of Ruby source code.
- */
-typedef enum {
-    PM_BUFFER_ESCAPING_RUBY,
-    PM_BUFFER_ESCAPING_JSON
-} pm_buffer_escaping_t;
-
-/**
- * Append a slice of source code to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param source The source code to append.
- * @param length The length of the source code to append.
- * @param escaping The type of escaping to perform.
- */
-void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
-
-/**
- * Prepend the given string to the buffer.
- *
- * @param buffer The buffer to prepend to.
- * @param value The string to prepend.
- * @param length The length of the string to prepend.
- */
-void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Concatenate one buffer onto another.
- *
- * @param destination The buffer to concatenate onto.
- * @param source The buffer to concatenate.
- */
-void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
-
-/**
- * Clear the buffer by reducing its size to 0. This does not free the allocated
- * memory, but it does allow the buffer to be reused.
- *
- * @param buffer The buffer to clear.
- */
-void pm_buffer_clear(pm_buffer_t *buffer);
-
-/**
- * Strip the whitespace from the end of the buffer.
- *
- * @param buffer The buffer to strip.
- */
-void pm_buffer_rstrip(pm_buffer_t *buffer);
-
-/**
- * Checks if the buffer includes the given value.
- *
- * @param buffer The buffer to check.
- * @param value The value to check for.
- * @returns The index of the first occurrence of the value in the buffer, or
- *   SIZE_MAX if the value is not found.
- */
-size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
-
-/**
- * Insert the given string into the buffer at the given index.
- *
- * @param buffer The buffer to insert into.
- * @param index The index to insert at.
- * @param value The string to insert.
- * @param length The length of the string to insert.
- */
-void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
-
-/**
- * Free the memory associated with the buffer.
- *
- * @param buffer The buffer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
-
-#endif
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
deleted file mode 100644
index deeafd6321..0000000000
--- a/prism/util/pm_char.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/**
- * @file pm_char.h
- *
- * Functions for working with characters and strings.
- */
-#ifndef PRISM_CHAR_H
-#define PRISM_CHAR_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_newline_list.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- *     whitespace.
- */
-size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace while also tracking the location of each newline. Disallows
- * searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param newline_list The list of newlines to populate.
- * @return The number of characters at the start of the string that are
- *     whitespace.
- */
-size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
-
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are inline
- *     whitespace.
- */
-size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are decimal
- *     digits.
- */
-size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits. Disallows searching past the given maximum number of
- * characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- *     hexadecimal digits.
- */
-size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are octal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- *     underscore.
- * @return The number of characters at the start of the string that are octal
- *     digits or underscores.
- */
-size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- *     underscore.
- * @return The number of characters at the start of the string that are decimal
- *     digits or underscores.
- */
-size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits or underscores. Disallows searching past the given maximum
- * number of characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- *     underscore.
- * @return The number of characters at the start of the string that are
- *     hexadecimal digits or underscores.
- */
-size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are regexp
- * options. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are regexp
- *     options.
- */
-size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are binary
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- *     underscore.
- * @return The number of characters at the start of the string that are binary
- *     digits or underscores.
- */
-size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns true if the given character is a whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is a whitespace character.
- */
-bool pm_char_is_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is an inline whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is an inline whitespace character.
- */
-bool pm_char_is_inline_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is a binary digit.
- *
- * @param b The character to check.
- * @return True if the given character is a binary digit.
- */
-bool pm_char_is_binary_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is an octal digit.
- *
- * @param b The character to check.
- * @return True if the given character is an octal digit.
- */
-bool pm_char_is_octal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a decimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a decimal digit.
- */
-bool pm_char_is_decimal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a hexadecimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a hexadecimal digit.
- */
-bool pm_char_is_hexadecimal_digit(const uint8_t b);
-
-#endif
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
deleted file mode 100644
index 6df23f8f50..0000000000
--- a/prism/util/pm_constant_pool.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * @file pm_constant_pool.h
- *
- * A data structure that stores a set of strings.
- *
- * Each string is assigned a unique id, which can be used to compare strings for
- * equality. This comparison ends up being much faster than strcmp, since it
- * only requires a single integer comparison.
- */
-#ifndef PRISM_CONSTANT_POOL_H
-#define PRISM_CONSTANT_POOL_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * When we allocate constants into the pool, we reserve 0 to mean that the slot
- * is not yet filled. This constant is reused in other places to indicate the
- * lack of a constant id.
- */
-#define PM_CONSTANT_ID_UNSET 0
-
-/**
- * A constant id is a unique identifier for a constant in the constant pool.
- */
-typedef uint32_t pm_constant_id_t;
-
-/**
- * A list of constant IDs. Usually used to represent a set of locals.
- */
-typedef struct {
-    /** The number of constant ids in the list. */
-    size_t size;
-
-    /** The number of constant ids that have been allocated in the list. */
-    size_t capacity;
-
-    /** The constant ids in the list. */
-    pm_constant_id_t *ids;
-} pm_constant_id_list_t;
-
-/**
- * Initialize a list of constant ids.
- *
- * @param list The list to initialize.
- */
-void pm_constant_id_list_init(pm_constant_id_list_t *list);
-
-/**
- * Initialize a list of constant ids with a given capacity.
- *
- * @param list The list to initialize.
- * @param capacity The initial capacity of the list.
- */
-void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
-
-/**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
- *
- * @param list The list to append to.
- * @param id The id to append.
- * @return Whether the append succeeded.
- */
-bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Insert a constant id into a list of constant ids at the specified index.
- *
- * @param list The list to insert into.
- * @param index The index at which to insert.
- * @param id The id to insert.
- */
-void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
-
-/**
- * Checks if the current constant id list includes the given constant id.
- *
- * @param list The list to check.
- * @param id The id to check for.
- * @return Whether the list includes the given id.
- */
-bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Free the memory associated with a list of constant ids.
- *
- * @param list The list to free.
- */
-void pm_constant_id_list_free(pm_constant_id_list_t *list);
-
-/**
- * The type of bucket in the constant pool hash map. This determines how the
- * bucket should be freed.
- */
-typedef unsigned int pm_constant_pool_bucket_type_t;
-
-/** By default, each constant is a slice of the source. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
-
-/** An owned constant is one for which memory has been allocated. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
-
-/** A constant constant is known at compile time. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
-
-/** A bucket in the hash map. */
-typedef struct {
-    /** The incremental ID used for indexing back into the pool. */
-    unsigned int id: 30;
-
-    /** The type of the bucket, which determines how to free it. */
-    pm_constant_pool_bucket_type_t type: 2;
-
-    /** The hash of the bucket. */
-    uint32_t hash;
-} pm_constant_pool_bucket_t;
-
-/** A constant in the pool which effectively stores a string. */
-typedef struct {
-    /** A pointer to the start of the string. */
-    const uint8_t *start;
-
-    /** The length of the string. */
-    size_t length;
-} pm_constant_t;
-
-/** The overall constant pool, which stores constants found while parsing. */
-typedef struct {
-    /** The buckets in the hash map. */
-    pm_constant_pool_bucket_t *buckets;
-
-    /** The constants that are stored in the buckets. */
-    pm_constant_t *constants;
-
-    /** The number of buckets in the hash map. */
-    uint32_t size;
-
-    /** The number of buckets that have been allocated in the hash map. */
-    uint32_t capacity;
-} pm_constant_pool_t;
-
-/**
- * Initialize a new constant pool with a given capacity.
- *
- * @param pool The pool to initialize.
- * @param capacity The initial capacity of the pool.
- * @return Whether the initialization succeeded.
- */
-bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
-
-/**
- * Return a pointer to the constant indicated by the given constant id.
- *
- * @param pool The pool to get the constant from.
- * @param constant_id The id of the constant to get.
- * @return A pointer to the constant.
- */
-pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
-
-/**
- * Find a constant in a constant pool. Returns the id of the constant, or 0 if
- * the constant is not found.
- *
- * @param pool The pool to find the constant in.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool that is a slice of a source string.
- * Returns the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is now owned by the
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
- * resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is constant. Returns
- * the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Free the memory associated with a constant pool.
- *
- * @param pool The pool to free.
- */
-void pm_constant_pool_free(pm_constant_pool_t *pool);
-
-#endif
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
deleted file mode 100644
index a9e2966703..0000000000
--- a/prism/util/pm_integer.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * @file pm_integer.h
- *
- * This module provides functions for working with arbitrary-sized integers.
- */
-#ifndef PRISM_NUMBER_H
-#define PRISM_NUMBER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_buffer.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-/**
- * A structure represents an arbitrary-sized integer.
- */
-typedef struct {
-    /**
-     * The number of allocated values. length is set to 0 if the integer fits
-     * into uint32_t.
-     */
-    size_t length;
-
-    /**
-     * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
-     */
-    uint32_t *values;
-
-    /**
-     * Embedded value for small integer. This value is set to 0 if the value
-     * does not fit into uint32_t.
-     */
-    uint32_t value;
-
-    /**
-     * Whether or not the integer is negative. It is stored this way so that a
-     * zeroed pm_integer_t is always positive zero.
-     */
-    bool negative;
-} pm_integer_t;
-
-/**
- * An enum controlling the base of an integer. It is expected that the base is
- * already known before parsing the integer, even though it could be derived
- * from the string itself.
- */
-typedef enum {
-    /** The default decimal base, with no prefix. Leading 0s will be ignored. */
-    PM_INTEGER_BASE_DEFAULT,
-
-    /** The binary base, indicated by a 0b or 0B prefix. */
-    PM_INTEGER_BASE_BINARY,
-
-    /** The octal base, indicated by a 0, 0o, or 0O prefix. */
-    PM_INTEGER_BASE_OCTAL,
-
-    /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
-    PM_INTEGER_BASE_DECIMAL,
-
-    /** The hexadecimal base, indicated by a 0x or 0X prefix. */
-    PM_INTEGER_BASE_HEXADECIMAL,
-
-    /**
-     * An unknown base, in which case pm_integer_parse will derive it based on
-     * the content of the string. This is less efficient and does more
-     * comparisons, so if callers know the base ahead of time, they should use
-     * that instead.
-     */
-    PM_INTEGER_BASE_UNKNOWN
-} pm_integer_base_t;
-
-/**
- * Parse an integer from a string. This assumes that the format of the integer
- * has already been validated, as internal validation checks are not performed
- * here.
- *
- * @param integer The integer to parse into.
- * @param base The base of the integer.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
-
-/**
- * Compare two integers. This function returns -1 if the left integer is less
- * than the right integer, 0 if they are equal, and 1 if the left integer is
- * greater than the right integer.
- *
- * @param left The left integer to compare.
- * @param right The right integer to compare.
- * @return The result of the comparison.
- */
-int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
-
-/**
- * Reduce a ratio of integers to its simplest form.
- *
- * If either the numerator or denominator do not fit into a 32-bit integer, then
- * this function is a no-op. In the future, we may consider reducing even the
- * larger numbers, but for now we're going to keep it simple.
- *
- * @param numerator The numerator of the ratio.
- * @param denominator The denominator of the ratio.
- */
-void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
-
-/**
- * Convert an integer to a decimal string.
- *
- * @param buffer The buffer to append the string to.
- * @param integer The integer to convert to a string.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single node in the linked list.
- *
- * @param integer The integer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
-
-#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
deleted file mode 100644
index ad2294cd60..0000000000
--- a/prism/util/pm_list.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "prism/util/pm_list.h"
-
-/**
- * Returns true if the given list is empty.
- */
-PRISM_EXPORTED_FUNCTION bool
-pm_list_empty_p(pm_list_t *list) {
-    return list->head == NULL;
-}
-
-/**
- * Returns the size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_list_size(pm_list_t *list) {
-    return list->size;
-}
-
-/**
- * Append a node to the given list.
- */
-void
-pm_list_append(pm_list_t *list, pm_list_node_t *node) {
-    if (list->head == NULL) {
-        list->head = node;
-    } else {
-        list->tail->next = node;
-    }
-
-    list->tail = node;
-    list->size++;
-}
-
-/**
- * Deallocate the internal state of the given list.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_list_free(pm_list_t *list) {
-    pm_list_node_t *node = list->head;
-    pm_list_node_t *next;
-
-    while (node != NULL) {
-        next = node->next;
-        xfree(node);
-        node = next;
-    }
-
-    list->size = 0;
-}
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
deleted file mode 100644
index e0671eaed3..0000000000
--- a/prism/util/pm_memchr.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * @file pm_memchr.h
- *
- * A custom memchr implementation.
- */
-#ifndef PRISM_MEMCHR_H
-#define PRISM_MEMCHR_H
-
-#include "prism/defines.h"
-#include "prism/encoding.h"
-
-#include <stddef.h>
-
-/**
- * We need to roll our own memchr to handle cases where the encoding changes and
- * we need to search for a character in a buffer that could be the trailing byte
- * of a multibyte character.
- *
- * @param source The source string.
- * @param character The character to search for.
- * @param number The maximum number of bytes to search.
- * @param encoding_changed Whether the encoding changed.
- * @param encoding A pointer to the encoding.
- * @return A pointer to the first occurrence of the character in the source
- *     string, or NULL if no such character exists.
- */
-void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
-
-#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
deleted file mode 100644
index 8331618f54..0000000000
--- a/prism/util/pm_newline_list.c
+++ /dev/null
@@ -1,125 +0,0 @@
-#include "prism/util/pm_newline_list.h"
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- */
-bool
-pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
-    list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
-    if (list->offsets == NULL) return false;
-
-    list->start = start;
-
-    // This is 1 instead of 0 because we want to include the first line of the
-    // file as having offset 0, which is set because of calloc.
-    list->size = 1;
-    list->capacity = capacity;
-
-    return true;
-}
-
-/**
- * Clear out the newlines that have been appended to the list.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list) {
-    list->size = 1;
-}
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- */
-bool
-pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
-    if (list->size == list->capacity) {
-        size_t *original_offsets = list->offsets;
-
-        list->capacity = (list->capacity * 3) / 2;
-        list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
-        if (list->offsets == NULL) return false;
-
-        memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
-        xfree(original_offsets);
-    }
-
-    assert(*cursor == '\n');
-    assert(cursor >= list->start);
-    size_t newline_offset = (size_t) (cursor - list->start + 1);
-
-    assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
-    list->offsets[list->size++] = newline_offset;
-
-    return true;
-}
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- */
-int32_t
-pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
-    assert(cursor >= list->start);
-    size_t offset = (size_t) (cursor - list->start);
-
-    size_t left = 0;
-    size_t right = list->size - 1;
-
-    while (left <= right) {
-        size_t mid = left + (right - left) / 2;
-
-        if (list->offsets[mid] == offset) {
-            return ((int32_t) mid) + start_line;
-        }
-
-        if (list->offsets[mid] < offset) {
-            left = mid + 1;
-        } else {
-            right = mid - 1;
-        }
-    }
-
-    return ((int32_t) left) + start_line - 1;
-}
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- */
-pm_line_column_t
-pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
-    assert(cursor >= list->start);
-    size_t offset = (size_t) (cursor - list->start);
-
-    size_t left = 0;
-    size_t right = list->size - 1;
-
-    while (left <= right) {
-        size_t mid = left + (right - left) / 2;
-
-        if (list->offsets[mid] == offset) {
-            return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
-        }
-
-        if (list->offsets[mid] < offset) {
-            left = mid + 1;
-        } else {
-            right = mid - 1;
-        }
-    }
-
-    return ((pm_line_column_t) {
-        .line = ((int32_t) left) + start_line - 1,
-        .column = (uint32_t) (offset - list->offsets[left - 1])
-    });
-}
-
-/**
- * Free the internal memory allocated for the newline list.
- */
-void
-pm_newline_list_free(pm_newline_list_t *list) {
-    xfree(list->offsets);
-}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
deleted file mode 100644
index 406abe8ba5..0000000000
--- a/prism/util/pm_newline_list.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * @file pm_newline_list.h
- *
- * A list of byte offsets of newlines in a string.
- *
- * When compiling the syntax tree, it's necessary to know the line and column
- * of many nodes. This is necessary to support things like error messages,
- * tracepoints, etc.
- *
- * It's possible that we could store the start line, start column, end line, and
- * end column on every node in addition to the offsets that we already store,
- * but that would be quite a lot of memory overhead.
- */
-#ifndef PRISM_NEWLINE_LIST_H
-#define PRISM_NEWLINE_LIST_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-/**
- * A list of offsets of newlines in a string. The offsets are assumed to be
- * sorted/inserted in ascending order.
- */
-typedef struct {
-    /** A pointer to the start of the source string. */
-    const uint8_t *start;
-
-    /** The number of offsets in the list. */
-    size_t size;
-
-    /** The capacity of the list that has been allocated. */
-    size_t capacity;
-
-    /** The list of offsets. */
-    size_t *offsets;
-} pm_newline_list_t;
-
-/**
- * A line and column in a string.
- */
-typedef struct {
-    /** The line number. */
-    int32_t line;
-
-    /** The column number. */
-    uint32_t column;
-} pm_line_column_t;
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- *
- * @param list The list to initialize.
- * @param start A pointer to the start of the source string.
- * @param capacity The initial capacity of the list.
- * @return True if the allocation of the offsets succeeds, otherwise false.
- */
-bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
-
-/**
- * Clear out the newlines that have been appended to the list.
- *
- * @param list The list to clear.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list);
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- *
- * @param list The list to append to.
- * @param cursor A pointer to the offset to append.
- * @return True if the reallocation of the offsets succeeds (if one was
- *     necessary), otherwise false.
- */
-bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line of the given offset.
- */
-int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line and column of the given offset.
- */
-pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Free the internal memory allocated for the newline list.
- *
- * @param list The list to free.
- */
-void pm_newline_list_free(pm_newline_list_t *list);
-
-#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
deleted file mode 100644
index 75422fbdf2..0000000000
--- a/prism/util/pm_string.c
+++ /dev/null
@@ -1,383 +0,0 @@
-#include "prism/util/pm_string.h"
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_sizeof(void) {
-    return sizeof(pm_string_t);
-}
-
-/**
- * Initialize a shared string that is based on initial input.
- */
-void
-pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
-    assert(start <= end);
-
-    *string = (pm_string_t) {
-        .type = PM_STRING_SHARED,
-        .source = start,
-        .length = (size_t) (end - start)
-    };
-}
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- */
-void
-pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
-    *string = (pm_string_t) {
-        .type = PM_STRING_OWNED,
-        .source = source,
-        .length = length
-    };
-}
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- */
-void
-pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
-    *string = (pm_string_t) {
-        .type = PM_STRING_CONSTANT,
-        .source = (const uint8_t *) source,
-        .length = length
-    };
-}
-
-#ifdef _WIN32
-/**
- * Represents a file handle on Windows, where the path will need to be freed
- * when the file is closed.
- */
-typedef struct {
-    /** The path to the file, which will become allocated memory. */
-    WCHAR *path;
-
-    /** The handle to the file, which will start as uninitialized memory. */
-    HANDLE file;
-} pm_string_file_handle_t;
-
-/**
- * Open the file indicated by the filepath parameter for reading on Windows.
- * Perform any kind of normalization that needs to happen on the filepath.
- */
-static pm_string_init_result_t
-pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
-    int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
-    if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
-
-    handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
-    if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
-        xfree(handle->path);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
-    if (handle->file == INVALID_HANDLE_VALUE) {
-        pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
-
-        if (GetLastError() == ERROR_ACCESS_DENIED) {
-            DWORD attributes = GetFileAttributesW(handle->path);
-            if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
-                result = PM_STRING_INIT_ERROR_DIRECTORY;
-            }
-        }
-
-        xfree(handle->path);
-        return result;
-    }
-
-    return PM_STRING_INIT_SUCCESS;
-}
-
-/**
- * Close the file handle and free the path.
- */
-static void
-pm_string_file_handle_close(pm_string_file_handle_t *handle) {
-    xfree(handle->path);
-    CloseHandle(handle->file);
-}
-#endif
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_mapped_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
-    // Open the file for reading.
-    pm_string_file_handle_t handle;
-    pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
-    if (result != PM_STRING_INIT_SUCCESS) return result;
-
-    // Get the file size.
-    DWORD file_size = GetFileSize(handle.file, NULL);
-    if (file_size == INVALID_FILE_SIZE) {
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // If the file is empty, then we don't need to do anything else, we'll set
-    // the source to a constant empty string and return.
-    if (file_size == 0) {
-        pm_string_file_handle_close(&handle);
-        const uint8_t source[] = "";
-        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
-        return PM_STRING_INIT_SUCCESS;
-    }
-
-    // Create a mapping of the file.
-    HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
-    if (mapping == NULL) {
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Map the file into memory.
-    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
-    CloseHandle(mapping);
-    pm_string_file_handle_close(&handle);
-
-    if (source == NULL) {
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
-    return PM_STRING_INIT_SUCCESS;
-#elif defined(_POSIX_MAPPED_FILES)
-    // Open the file for reading
-    int fd = open(filepath, O_RDONLY);
-    if (fd == -1) {
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Stat the file to get the file size
-    struct stat sb;
-    if (fstat(fd, &sb) == -1) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Ensure it is a file and not a directory
-    if (S_ISDIR(sb.st_mode)) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_DIRECTORY;
-    }
-
-    // mmap the file descriptor to virtually get the contents
-    size_t size = (size_t) sb.st_size;
-    uint8_t *source = NULL;
-
-    if (size == 0) {
-        close(fd);
-        const uint8_t source[] = "";
-        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
-        return PM_STRING_INIT_SUCCESS;
-    }
-
-    source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-    if (source == MAP_FAILED) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    close(fd);
-    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
-    return PM_STRING_INIT_SUCCESS;
-#else
-    return pm_string_file_init(string, filepath);
-#endif
-}
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_file_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
-    // Open the file for reading.
-    pm_string_file_handle_t handle;
-    pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
-    if (result != PM_STRING_INIT_SUCCESS) return result;
-
-    // Get the file size.
-    DWORD file_size = GetFileSize(handle.file, NULL);
-    if (file_size == INVALID_FILE_SIZE) {
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // If the file is empty, then we don't need to do anything else, we'll set
-    // the source to a constant empty string and return.
-    if (file_size == 0) {
-        pm_string_file_handle_close(&handle);
-        const uint8_t source[] = "";
-        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
-        return PM_STRING_INIT_SUCCESS;
-    }
-
-    // Create a buffer to read the file into.
-    uint8_t *source = xmalloc(file_size);
-    if (source == NULL) {
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Read the contents of the file
-    DWORD bytes_read;
-    if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Check the number of bytes read
-    if (bytes_read != file_size) {
-        xfree(source);
-        pm_string_file_handle_close(&handle);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    pm_string_file_handle_close(&handle);
-    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
-    return PM_STRING_INIT_SUCCESS;
-#elif defined(PRISM_HAS_FILESYSTEM)
-    // Open the file for reading
-    int fd = open(filepath, O_RDONLY);
-    if (fd == -1) {
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Stat the file to get the file size
-    struct stat sb;
-    if (fstat(fd, &sb) == -1) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    // Ensure it is a file and not a directory
-    if (S_ISDIR(sb.st_mode)) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_DIRECTORY;
-    }
-
-    // Check the size to see if it's empty
-    size_t size = (size_t) sb.st_size;
-    if (size == 0) {
-        close(fd);
-        const uint8_t source[] = "";
-        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
-        return PM_STRING_INIT_SUCCESS;
-    }
-
-    size_t length = (size_t) size;
-    uint8_t *source = xmalloc(length);
-    if (source == NULL) {
-        close(fd);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    long bytes_read = (long) read(fd, source, length);
-    close(fd);
-
-    if (bytes_read == -1) {
-        xfree(source);
-        return PM_STRING_INIT_ERROR_GENERIC;
-    }
-
-    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
-    return PM_STRING_INIT_SUCCESS;
-#else
-    (void) string;
-    (void) filepath;
-    perror("pm_string_file_init is not implemented for this platform");
-    return PM_STRING_INIT_ERROR_GENERIC;
-#endif
-}
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- */
-void
-pm_string_ensure_owned(pm_string_t *string) {
-    if (string->type == PM_STRING_OWNED) return;
-
-    size_t length = pm_string_length(string);
-    const uint8_t *source = pm_string_source(string);
-
-    uint8_t *memory = xmalloc(length);
-    if (!memory) return;
-
-    pm_string_owned_init(string, memory, length);
-    memcpy((void *) string->source, source, length);
-}
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- */
-int
-pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
-    size_t left_length = pm_string_length(left);
-    size_t right_length = pm_string_length(right);
-
-    if (left_length < right_length) {
-        return -1;
-    } else if (left_length > right_length) {
-        return 1;
-    }
-
-    return memcmp(pm_string_source(left), pm_string_source(right), left_length);
-}
-
-/**
- * Returns the length associated with the string.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_length(const pm_string_t *string) {
-    return string->length;
-}
-
-/**
- * Returns the start pointer associated with the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t *
-pm_string_source(const pm_string_t *string) {
-    return string->source;
-}
-
-/**
- * Free the associated memory of the given string.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_string_free(pm_string_t *string) {
-    void *memory = (void *) string->source;
-
-    if (string->type == PM_STRING_OWNED) {
-        xfree(memory);
-#ifdef PRISM_HAS_MMAP
-    } else if (string->type == PM_STRING_MAPPED && string->length) {
-#if defined(_WIN32)
-        UnmapViewOfFile(memory);
-#elif defined(_POSIX_MAPPED_FILES)
-        munmap(memory, string->length);
-#endif
-#endif /* PRISM_HAS_MMAP */
-    }
-}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
deleted file mode 100644
index f99f1abdf3..0000000000
--- a/prism/util/pm_string.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/**
- * @file pm_string.h
- *
- * A generic string type that can have various ownership semantics.
- */
-#ifndef PRISM_STRING_H
-#define PRISM_STRING_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-
-// The following headers are necessary to read files using demand paging.
-#ifdef _WIN32
-#include <windows.h>
-#elif defined(_POSIX_MAPPED_FILES)
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#elif defined(PRISM_HAS_FILESYSTEM)
-#include <fcntl.h>
-#include <sys/stat.h>
-#endif
-
-/**
- * A generic string type that can have various ownership semantics.
- */
-typedef struct {
-    /** A pointer to the start of the string. */
-    const uint8_t *source;
-
-    /** The length of the string in bytes of memory. */
-    size_t length;
-
-    /** The type of the string. This field determines how the string should be freed. */
-    enum {
-        /** This string is a constant string, and should not be freed. */
-        PM_STRING_CONSTANT,
-
-        /** This is a slice of another string, and should not be freed. */
-        PM_STRING_SHARED,
-
-        /** This string owns its memory, and should be freed using `pm_string_free`. */
-        PM_STRING_OWNED,
-
-#ifdef PRISM_HAS_MMAP
-        /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
-        PM_STRING_MAPPED
-#endif
-    } type;
-} pm_string_t;
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- *
- * @return The size of the pm_string_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
-
-/**
- * Defines an empty string. This is useful for initializing a string that will
- * be filled in later.
- */
-#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
-
-/**
- * Initialize a shared string that is based on initial input.
- *
- * @param string The string to initialize.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
-
-/**
- * Represents the result of calling pm_string_mapped_init or
- * pm_string_file_init. We need this additional information because there is
- * not a platform-agnostic way to indicate that the file that was attempted to
- * be opened was a directory.
- */
-typedef enum {
-    /** Indicates that the string was successfully initialized. */
-    PM_STRING_INIT_SUCCESS = 0,
-    /**
-     * Indicates a generic error from a string_*_init function, where the type
-     * of error should be read from `errno` or `GetLastError()`.
-     */
-    PM_STRING_INIT_ERROR_GENERIC = 1,
-    /**
-     * Indicates that the file that was attempted to be opened was a directory.
-     */
-    PM_STRING_INIT_ERROR_DIRECTORY = 2
-} pm_string_init_result_t;
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- *
- * @param string The string to ensure is owned.
- */
-void pm_string_ensure_owned(pm_string_t *string);
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- *
- * @param left The left string to compare.
- * @param right The right string to compare.
- * @return The comparison result.
- */
-int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
-
-/**
- * Returns the length associated with the string.
- *
- * @param string The string to get the length of.
- * @return The length of the string.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
-
-/**
- * Returns the start pointer associated with the string.
- *
- * @param string The string to get the start pointer of.
- * @return The start pointer of the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
-
-/**
- * Free the associated memory of the given string.
- *
- * @param string The string to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
-
-#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
deleted file mode 100644
index 916a4cc3fd..0000000000
--- a/prism/util/pm_strpbrk.c
+++ /dev/null
@@ -1,206 +0,0 @@
-#include "prism/util/pm_strpbrk.h"
-
-/**
- * Add an invalid multibyte character error to the parser.
- */
-static inline void
-pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
-    pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
-}
-
-/**
- * Set the explicit encoding for the parser to the current encoding.
- */
-static inline void
-pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
-    if (parser->explicit_encoding != NULL) {
-        if (parser->explicit_encoding == parser->encoding) {
-            // Okay, we already locked to this encoding.
-        } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
-            // Not okay, we already found a Unicode escape sequence and this
-            // conflicts.
-            pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
-        } else {
-            // Should not be anything else.
-            assert(false && "unreachable");
-        }
-    }
-
-    parser->explicit_encoding = parser->encoding;
-}
-
-/**
- * This is the default path.
- */
-static inline const uint8_t *
-pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-
-    while (index < maximum) {
-        if (strchr((const char *) charset, source[index]) != NULL) {
-            return source + index;
-        }
-
-        if (source[index] < 0x80) {
-            index++;
-        } else {
-            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
-
-            if (width > 0) {
-                index += width;
-            } else if (!validate) {
-                index++;
-            } else {
-                // At this point we know we have an invalid multibyte character.
-                // We'll walk forward as far as we can until we find the next
-                // valid character so that we don't spam the user with a ton of
-                // the same kind of error.
-                const size_t start = index;
-
-                do {
-                    index++;
-                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
-                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
-            }
-        }
-    }
-
-    return NULL;
-}
-
-/**
- * This is the path when the encoding is ASCII-8BIT.
- */
-static inline const uint8_t *
-pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-
-    while (index < maximum) {
-        if (strchr((const char *) charset, source[index]) != NULL) {
-            return source + index;
-        }
-
-        if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
-        index++;
-    }
-
-    return NULL;
-}
-
-/**
- * This is the slow path that does care about the encoding.
- */
-static inline const uint8_t *
-pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-    const pm_encoding_t *encoding = parser->encoding;
-
-    while (index < maximum) {
-        if (strchr((const char *) charset, source[index]) != NULL) {
-            return source + index;
-        }
-
-        if (source[index] < 0x80) {
-            index++;
-        } else {
-            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
-            if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
-
-            if (width > 0) {
-                index += width;
-            } else if (!validate) {
-                index++;
-            } else {
-                // At this point we know we have an invalid multibyte character.
-                // We'll walk forward as far as we can until we find the next
-                // valid character so that we don't spam the user with a ton of
-                // the same kind of error.
-                const size_t start = index;
-
-                do {
-                    index++;
-                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
-                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
-            }
-        }
-    }
-
-    return NULL;
-}
-
-/**
- * This is the fast path that does not care about the encoding because we know
- * the encoding only supports single-byte characters.
- */
-static inline const uint8_t *
-pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-    const pm_encoding_t *encoding = parser->encoding;
-
-    while (index < maximum) {
-        if (strchr((const char *) charset, source[index]) != NULL) {
-            return source + index;
-        }
-
-        if (source[index] < 0x80 || !validate) {
-            index++;
-        } else {
-            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
-            pm_strpbrk_explicit_encoding_set(parser, source, width);
-
-            if (width > 0) {
-                index += width;
-            } else {
-                // At this point we know we have an invalid multibyte character.
-                // We'll walk forward as far as we can until we find the next
-                // valid character so that we don't spam the user with a ton of
-                // the same kind of error.
-                const size_t start = index;
-
-                do {
-                    index++;
-                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
-                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
-            }
-        }
-    }
-
-    return NULL;
-}
-
-/**
- * Here we have rolled our own version of strpbrk. The standard library strpbrk
- * has undefined behavior when the source string is not null-terminated. We want
- * to support strings that are not null-terminated because pm_parse does not
- * have the contract that the string is null-terminated. (This is desirable
- * because it means the extension can call pm_parse with the result of a call to
- * mmap).
- *
- * The standard library strpbrk also does not support passing a maximum length
- * to search. We want to support this for the reason mentioned above, but we
- * also don't want it to stop on null bytes. Ruby actually allows null bytes
- * within strings, comments, regular expressions, etc. So we need to be able to
- * skip past them.
- *
- * Finally, we want to support encodings wherein the charset could contain
- * characters that are trailing bytes of multi-byte characters. For example, in
- * Shift_JIS, the backslash character can be a trailing byte. In that case we
- * need to take a slower path and iterate one multi-byte character at a time.
- */
-const uint8_t *
-pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
-    if (length <= 0) {
-        return NULL;
-    } else if (!parser->encoding_changed) {
-        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
-    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
-        return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
-    } else if (parser->encoding->multibyte) {
-        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
-    } else {
-        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
-    }
-}
diff --git a/prism/version.h b/prism/version.h
index 0a2a8c8fce..181b398462 100644
--- a/prism/version.h
+++ b/prism/version.h
@@ -6,6 +6,8 @@
 #ifndef PRISM_VERSION_H
 #define PRISM_VERSION_H
 
+#include "prism/compiler/exported.h"
+
 /**
  * The major version of the Prism library as an int.
  */
@@ -14,7 +16,7 @@
 /**
  * The minor version of the Prism library as an int.
  */
-#define PRISM_VERSION_MINOR 4
+#define PRISM_VERSION_MINOR 9
 
 /**
  * The patch version of the Prism library as an int.
@@ -24,6 +26,13 @@
 /**
  * The version of the Prism library as a constant string.
  */
-#define PRISM_VERSION "1.4.0"
+#define PRISM_VERSION "1.9.0"
+
+/**
+ * The prism version and the serialization format.
+ *
+ * @returns The prism version as a constant string.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_version(void);
 
 #endif